alamb commented on code in PR #4197:
URL: https://github.com/apache/arrow-rs/pull/4197#discussion_r1190278778
##########
arrow-array/src/array/list_array.rs:
##########
@@ -28,7 +28,7 @@ use num::Integer;
use std::any::Any;
use std::sync::Arc;
-/// trait declaring an offset size, relevant for i32 vs i64 array types.
+/// A type that can be used within a variable-size array to encode offset
information
Review Comment:
I wonder if it is worth linking to StringArray, LargeStringArray, Lists, etc
##########
arrow-array/src/array/list_array.rs:
##########
@@ -475,8 +471,8 @@ impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for
GenericListArray<OffsetSiz
/// ```
pub type ListArray = GenericListArray<i32>;
-/// A list array where each element is a variable-sized sequence of values
with the same
-/// type whose memory offsets between elements are represented by a i64.
+/// An array of variable size arrays, storing offsets as `i64`.
Review Comment:
Same comment as above
```suggestion
/// An array of variable size lists, storing offsets as `i64`.
```
##########
arrow-array/src/array/primitive_array.rs:
##########
@@ -157,82 +181,99 @@ pub type Float64Array = PrimitiveArray<Float64Type>;
/// ```
///
pub type TimestampSecondArray = PrimitiveArray<TimestampSecondType>;
-/// A primitive array where each element is of type `TimestampMillisecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in milliseconds
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampMillisecondArray = PrimitiveArray<TimestampMillisecondType>;
-/// A primitive array where each element is of type `TimestampMicrosecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in microseconds
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampMicrosecondArray = PrimitiveArray<TimestampMicrosecondType>;
-/// A primitive array where each element is of type `TimestampNanosecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in nanoseconds
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampNanosecondArray = PrimitiveArray<TimestampNanosecondType>;
// TODO: give examples for the below types
-/// A primitive array where each element is of 32-bit value
-/// representing the elapsed time since UNIX epoch in days."
+/// An array of 32-bit values representing the elapsed time
+/// since UNIX epoch in days
///
/// This type is similar to the [`chrono::NaiveDate`] type and can hold
/// values such as `2018-11-13`
pub type Date32Array = PrimitiveArray<Date32Type>;
-/// A primitive array where each element is a 64-bit value
-/// representing the elapsed time since the UNIX epoch in milliseconds.
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in milliseconds
///
-/// This type is similar to the [`chrono::NaiveDateTime`] type and can hold
-/// values such as `2018-11-13T17:11:10.011`
+/// This type is similar to the [`chrono::NaiveDate`] type and can hold
+/// values such as `2018-11-13`
pub type Date64Array = PrimitiveArray<Date64Type>;
-/// An array where each element is of 32-bit type representing time elapsed in
seconds
-/// since midnight.
+/// An array of 32-bit values representing the elapsed time
+/// since midnight in seconds
Review Comment:
```suggestion
/// An array of 32-bit values representing the seconds elapsed since
/// midnight
```
##########
arrow-array/src/array/primitive_array.rs:
##########
@@ -157,82 +181,99 @@ pub type Float64Array = PrimitiveArray<Float64Type>;
/// ```
///
pub type TimestampSecondArray = PrimitiveArray<TimestampSecondType>;
-/// A primitive array where each element is of type `TimestampMillisecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in milliseconds
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampMillisecondArray = PrimitiveArray<TimestampMillisecondType>;
-/// A primitive array where each element is of type `TimestampMicrosecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in microseconds
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampMicrosecondArray = PrimitiveArray<TimestampMicrosecondType>;
-/// A primitive array where each element is of type `TimestampNanosecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in nanoseconds
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampNanosecondArray = PrimitiveArray<TimestampNanosecondType>;
// TODO: give examples for the below types
-/// A primitive array where each element is of 32-bit value
-/// representing the elapsed time since UNIX epoch in days."
+/// An array of 32-bit values representing the elapsed time
+/// since UNIX epoch in days
///
/// This type is similar to the [`chrono::NaiveDate`] type and can hold
/// values such as `2018-11-13`
pub type Date32Array = PrimitiveArray<Date32Type>;
-/// A primitive array where each element is a 64-bit value
-/// representing the elapsed time since the UNIX epoch in milliseconds.
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in milliseconds
///
-/// This type is similar to the [`chrono::NaiveDateTime`] type and can hold
-/// values such as `2018-11-13T17:11:10.011`
+/// This type is similar to the [`chrono::NaiveDate`] type and can hold
+/// values such as `2018-11-13`
pub type Date64Array = PrimitiveArray<Date64Type>;
-/// An array where each element is of 32-bit type representing time elapsed in
seconds
-/// since midnight.
+/// An array of 32-bit values representing the elapsed time
+/// since midnight in seconds
///
/// This type is similar to the [`chrono::NaiveTime`] type and can
/// hold values such as `00:02:00`
pub type Time32SecondArray = PrimitiveArray<Time32SecondType>;
-/// An array where each element is of 32-bit type representing time elapsed in
milliseconds
-/// since midnight.
+
+/// An array of 32-bit values representing the elapsed time
+/// since midnight in milliseconds
///
/// This type is similar to the [`chrono::NaiveTime`] type and can
/// hold values such as `00:02:00.123`
pub type Time32MillisecondArray = PrimitiveArray<Time32MillisecondType>;
-/// An array where each element is of 64-bit type representing time elapsed in
microseconds
-/// since midnight.
+
+/// An array of 64-bit values representing the elapsed time
+/// since midnight in microseconds
Review Comment:
```suggestion
/// An array of 64-bit values representing the elapsed microseconds since
midnight
```
##########
arrow-buffer/src/buffer/scalar.rs:
##########
@@ -22,12 +22,24 @@ use std::fmt::Formatter;
use std::marker::PhantomData;
use std::ops::Deref;
-/// Provides a safe API for interpreting a [`Buffer`] as a slice of
[`ArrowNativeType`]
+/// A strongly-typed [`Buffer`] supporting zero-copy cloning and slicing
///
-/// # Safety
+/// The easiest way to think about `ScalarBuffer<T>` is being equivalent to a
`Arc<Vec<T>>`,
Review Comment:
This is a very helpful comparison. Thank you
##########
arrow-array/src/array/struct_array.rs:
##########
@@ -22,9 +22,9 @@ use arrow_schema::{ArrowError, DataType, Field, FieldRef,
Fields, SchemaBuilder}
use std::sync::Arc;
use std::{any::Any, ops::Index};
-/// A nested array type where each child (called *field*) is represented by a
separate
-/// array.
+/// An array of
[tuples](https://arrow.apache.org/docs/format/Columnar.html#struct-layout)
Review Comment:
It is an array of structs, right?
```suggestion
/// An array of
[structs](https://arrow.apache.org/docs/format/Columnar.html#struct-layout)
```
##########
arrow-array/src/lib.rs:
##########
@@ -91,63 +55,114 @@
//!
//! // Append a single primitive value
//! builder.append_value(1);
-//!
//! // Append a null value
//! builder.append_null();
-//!
//! // Append a slice of primitive values
//! builder.append_slice(&[2, 3, 4]);
//!
//! // Build the array
//! let array = builder.finish();
//!
-//! assert_eq!(
-//! 5,
-//! array.len(),
-//! "The array has 5 values, counting the null value"
-//! );
+//! assert_eq!(5, array.len());
+//! assert_eq!(2, array.value(2));
+//! assert_eq!(&array.values()[3..5], &[3, 4])
+//! ```
//!
-//! assert_eq!(2, array.value(2), "Get the value with index 2");
+//! # Low-level API
+//!
+//! Internally, arrays consist of one or more shared memory regions backed by
[`Buffer`],
Review Comment:
Backed by "a" buffer?
##########
arrow-array/src/lib.rs:
##########
Review Comment:
I reviewed the content in this module in its rendered form:
<img width="1130" alt="Screenshot 2023-05-10 at 6 29 56 PM"
src="https://github.com/apache/arrow-rs/assets/490673/9dd729bf-ff61-4ac9-99eb-2947c246aef8">
Love it
##########
arrow-array/src/array/primitive_array.rs:
##########
@@ -157,82 +181,99 @@ pub type Float64Array = PrimitiveArray<Float64Type>;
/// ```
///
pub type TimestampSecondArray = PrimitiveArray<TimestampSecondType>;
-/// A primitive array where each element is of type `TimestampMillisecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in milliseconds
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampMillisecondArray = PrimitiveArray<TimestampMillisecondType>;
-/// A primitive array where each element is of type `TimestampMicrosecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in microseconds
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampMicrosecondArray = PrimitiveArray<TimestampMicrosecondType>;
-/// A primitive array where each element is of type `TimestampNanosecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in nanoseconds
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampNanosecondArray = PrimitiveArray<TimestampNanosecondType>;
// TODO: give examples for the below types
-/// A primitive array where each element is of 32-bit value
-/// representing the elapsed time since UNIX epoch in days."
+/// An array of 32-bit values representing the elapsed time
+/// since UNIX epoch in days
///
/// This type is similar to the [`chrono::NaiveDate`] type and can hold
/// values such as `2018-11-13`
pub type Date32Array = PrimitiveArray<Date32Type>;
-/// A primitive array where each element is a 64-bit value
-/// representing the elapsed time since the UNIX epoch in milliseconds.
+
+/// An array of 64-bit values representing the elapsed time
Review Comment:
Likewise, "elapsed milliseconds since the UNIX epoch"
##########
arrow-array/src/array/list_array.rs:
##########
@@ -447,8 +444,7 @@ impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for
GenericListArray<OffsetSiz
}
}
-/// A list array where each element is a variable-sized sequence of values
with the same
-/// type whose memory offsets between elements are represented by a i32.
+/// An array of variable size arrays, storing offsets as `i32`.
Review Comment:
I found the two uses of `array` that mean different things (Arrow Arrays and
the lists) was confuing
Perhaps using the term `lists` would be less confusing
```suggestion
/// An array of variable size lists, storing offsets as `i32`.
```
##########
arrow-array/src/array/fixed_size_binary_array.rs:
##########
@@ -25,7 +25,7 @@ use arrow_schema::{ArrowError, DataType};
use std::any::Any;
use std::sync::Arc;
-/// An array where each element is a fixed-size sequence of bytes.
+/// An array of [fixed size binary
arrays](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-primitive-layout)
Review Comment:
the link to primitive arrays seems strange for fixed size binary, but I
looked around on the docs and I didn't see any better section 🤷
##########
arrow-array/src/array/primitive_array.rs:
##########
@@ -157,82 +181,99 @@ pub type Float64Array = PrimitiveArray<Float64Type>;
/// ```
///
pub type TimestampSecondArray = PrimitiveArray<TimestampSecondType>;
-/// A primitive array where each element is of type `TimestampMillisecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in milliseconds
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampMillisecondArray = PrimitiveArray<TimestampMillisecondType>;
-/// A primitive array where each element is of type `TimestampMicrosecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in microseconds
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampMicrosecondArray = PrimitiveArray<TimestampMicrosecondType>;
-/// A primitive array where each element is of type `TimestampNanosecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in nanoseconds
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampNanosecondArray = PrimitiveArray<TimestampNanosecondType>;
// TODO: give examples for the below types
-/// A primitive array where each element is of 32-bit value
-/// representing the elapsed time since UNIX epoch in days."
+/// An array of 32-bit values representing the elapsed time
Review Comment:
I feel like the term "time" here might be confusing -- what about using
"elapsed days since the UNIX epoch"?
##########
arrow-array/src/array/primitive_array.rs:
##########
@@ -157,82 +181,99 @@ pub type Float64Array = PrimitiveArray<Float64Type>;
/// ```
///
pub type TimestampSecondArray = PrimitiveArray<TimestampSecondType>;
-/// A primitive array where each element is of type `TimestampMillisecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in milliseconds
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampMillisecondArray = PrimitiveArray<TimestampMillisecondType>;
-/// A primitive array where each element is of type `TimestampMicrosecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in microseconds
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampMicrosecondArray = PrimitiveArray<TimestampMicrosecondType>;
-/// A primitive array where each element is of type `TimestampNanosecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in nanoseconds
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampNanosecondArray = PrimitiveArray<TimestampNanosecondType>;
// TODO: give examples for the below types
-/// A primitive array where each element is of 32-bit value
-/// representing the elapsed time since UNIX epoch in days."
+/// An array of 32-bit values representing the elapsed time
+/// since UNIX epoch in days
///
/// This type is similar to the [`chrono::NaiveDate`] type and can hold
/// values such as `2018-11-13`
pub type Date32Array = PrimitiveArray<Date32Type>;
-/// A primitive array where each element is a 64-bit value
-/// representing the elapsed time since the UNIX epoch in milliseconds.
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in milliseconds
///
-/// This type is similar to the [`chrono::NaiveDateTime`] type and can hold
-/// values such as `2018-11-13T17:11:10.011`
+/// This type is similar to the [`chrono::NaiveDate`] type and can hold
+/// values such as `2018-11-13`
pub type Date64Array = PrimitiveArray<Date64Type>;
-/// An array where each element is of 32-bit type representing time elapsed in
seconds
-/// since midnight.
+/// An array of 32-bit values representing the elapsed time
+/// since midnight in seconds
///
/// This type is similar to the [`chrono::NaiveTime`] type and can
/// hold values such as `00:02:00`
pub type Time32SecondArray = PrimitiveArray<Time32SecondType>;
-/// An array where each element is of 32-bit type representing time elapsed in
milliseconds
-/// since midnight.
+
+/// An array of 32-bit values representing the elapsed time
+/// since midnight in milliseconds
Review Comment:
```suggestion
/// An array of 32-bit values representing the milliseconds since midnight
```
##########
arrow-array/src/array/primitive_array.rs:
##########
@@ -157,82 +181,99 @@ pub type Float64Array = PrimitiveArray<Float64Type>;
/// ```
///
pub type TimestampSecondArray = PrimitiveArray<TimestampSecondType>;
-/// A primitive array where each element is of type `TimestampMillisecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in milliseconds
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampMillisecondArray = PrimitiveArray<TimestampMillisecondType>;
-/// A primitive array where each element is of type `TimestampMicrosecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in microseconds
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampMicrosecondArray = PrimitiveArray<TimestampMicrosecondType>;
-/// A primitive array where each element is of type `TimestampNanosecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in nanoseconds
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampNanosecondArray = PrimitiveArray<TimestampNanosecondType>;
// TODO: give examples for the below types
-/// A primitive array where each element is of 32-bit value
-/// representing the elapsed time since UNIX epoch in days."
+/// An array of 32-bit values representing the elapsed time
+/// since UNIX epoch in days
///
/// This type is similar to the [`chrono::NaiveDate`] type and can hold
/// values such as `2018-11-13`
pub type Date32Array = PrimitiveArray<Date32Type>;
-/// A primitive array where each element is a 64-bit value
-/// representing the elapsed time since the UNIX epoch in milliseconds.
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in milliseconds
///
-/// This type is similar to the [`chrono::NaiveDateTime`] type and can hold
-/// values such as `2018-11-13T17:11:10.011`
+/// This type is similar to the [`chrono::NaiveDate`] type and can hold
+/// values such as `2018-11-13`
pub type Date64Array = PrimitiveArray<Date64Type>;
-/// An array where each element is of 32-bit type representing time elapsed in
seconds
-/// since midnight.
+/// An array of 32-bit values representing the elapsed time
+/// since midnight in seconds
///
/// This type is similar to the [`chrono::NaiveTime`] type and can
/// hold values such as `00:02:00`
pub type Time32SecondArray = PrimitiveArray<Time32SecondType>;
-/// An array where each element is of 32-bit type representing time elapsed in
milliseconds
-/// since midnight.
+
+/// An array of 32-bit values representing the elapsed time
+/// since midnight in milliseconds
Review Comment:
```suggestion
/// An array of 32-bit values representing the milliseconds since midnight
```
##########
arrow-array/src/array/primitive_array.rs:
##########
@@ -157,82 +181,99 @@ pub type Float64Array = PrimitiveArray<Float64Type>;
/// ```
///
pub type TimestampSecondArray = PrimitiveArray<TimestampSecondType>;
-/// A primitive array where each element is of type `TimestampMillisecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in milliseconds
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampMillisecondArray = PrimitiveArray<TimestampMillisecondType>;
-/// A primitive array where each element is of type `TimestampMicrosecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in microseconds
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampMicrosecondArray = PrimitiveArray<TimestampMicrosecondType>;
-/// A primitive array where each element is of type `TimestampNanosecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in nanoseconds
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampNanosecondArray = PrimitiveArray<TimestampNanosecondType>;
// TODO: give examples for the below types
-/// A primitive array where each element is of 32-bit value
-/// representing the elapsed time since UNIX epoch in days."
+/// An array of 32-bit values representing the elapsed time
+/// since UNIX epoch in days
///
/// This type is similar to the [`chrono::NaiveDate`] type and can hold
/// values such as `2018-11-13`
pub type Date32Array = PrimitiveArray<Date32Type>;
-/// A primitive array where each element is a 64-bit value
-/// representing the elapsed time since the UNIX epoch in milliseconds.
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in milliseconds
///
-/// This type is similar to the [`chrono::NaiveDateTime`] type and can hold
-/// values such as `2018-11-13T17:11:10.011`
+/// This type is similar to the [`chrono::NaiveDate`] type and can hold
+/// values such as `2018-11-13`
pub type Date64Array = PrimitiveArray<Date64Type>;
-/// An array where each element is of 32-bit type representing time elapsed in
seconds
-/// since midnight.
+/// An array of 32-bit values representing the elapsed time
+/// since midnight in seconds
///
/// This type is similar to the [`chrono::NaiveTime`] type and can
/// hold values such as `00:02:00`
pub type Time32SecondArray = PrimitiveArray<Time32SecondType>;
-/// An array where each element is of 32-bit type representing time elapsed in
milliseconds
-/// since midnight.
+
+/// An array of 32-bit values representing the elapsed time
+/// since midnight in milliseconds
Review Comment:
```suggestion
/// An array of 32-bit values representing the elapsed milliseconds since
midnight
```
##########
arrow-array/src/array/primitive_array.rs:
##########
@@ -157,82 +181,99 @@ pub type Float64Array = PrimitiveArray<Float64Type>;
/// ```
///
pub type TimestampSecondArray = PrimitiveArray<TimestampSecondType>;
-/// A primitive array where each element is of type `TimestampMillisecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in milliseconds
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampMillisecondArray = PrimitiveArray<TimestampMillisecondType>;
-/// A primitive array where each element is of type `TimestampMicrosecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in microseconds
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampMicrosecondArray = PrimitiveArray<TimestampMicrosecondType>;
-/// A primitive array where each element is of type `TimestampNanosecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in nanoseconds
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampNanosecondArray = PrimitiveArray<TimestampNanosecondType>;
// TODO: give examples for the below types
-/// A primitive array where each element is of 32-bit value
-/// representing the elapsed time since UNIX epoch in days."
+/// An array of 32-bit values representing the elapsed time
+/// since UNIX epoch in days
///
/// This type is similar to the [`chrono::NaiveDate`] type and can hold
/// values such as `2018-11-13`
pub type Date32Array = PrimitiveArray<Date32Type>;
-/// A primitive array where each element is a 64-bit value
-/// representing the elapsed time since the UNIX epoch in milliseconds.
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in milliseconds
///
-/// This type is similar to the [`chrono::NaiveDateTime`] type and can hold
-/// values such as `2018-11-13T17:11:10.011`
+/// This type is similar to the [`chrono::NaiveDate`] type and can hold
+/// values such as `2018-11-13`
pub type Date64Array = PrimitiveArray<Date64Type>;
-/// An array where each element is of 32-bit type representing time elapsed in
seconds
-/// since midnight.
+/// An array of 32-bit values representing the elapsed time
+/// since midnight in seconds
///
/// This type is similar to the [`chrono::NaiveTime`] type and can
/// hold values such as `00:02:00`
pub type Time32SecondArray = PrimitiveArray<Time32SecondType>;
-/// An array where each element is of 32-bit type representing time elapsed in
milliseconds
-/// since midnight.
+
+/// An array of 32-bit values representing the elapsed time
+/// since midnight in milliseconds
///
/// This type is similar to the [`chrono::NaiveTime`] type and can
/// hold values such as `00:02:00.123`
pub type Time32MillisecondArray = PrimitiveArray<Time32MillisecondType>;
-/// An array where each element is of 64-bit type representing time elapsed in
microseconds
-/// since midnight.
+
+/// An array of 64-bit values representing the elapsed time
+/// since midnight in microseconds
Review Comment:
```suggestion
/// An array of 64-bit values representing the elapsed microseconds since
midnight
```
##########
arrow-array/src/array/primitive_array.rs:
##########
@@ -157,82 +181,99 @@ pub type Float64Array = PrimitiveArray<Float64Type>;
/// ```
///
pub type TimestampSecondArray = PrimitiveArray<TimestampSecondType>;
-/// A primitive array where each element is of type `TimestampMillisecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in milliseconds
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampMillisecondArray = PrimitiveArray<TimestampMillisecondType>;
-/// A primitive array where each element is of type `TimestampMicrosecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in microseconds
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampMicrosecondArray = PrimitiveArray<TimestampMicrosecondType>;
-/// A primitive array where each element is of type `TimestampNanosecondType.`
-/// See examples for
[`TimestampSecondArray.`](crate::array::TimestampSecondArray)
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in nanoseconds
+///
+/// See examples for [`TimestampSecondArray`]
pub type TimestampNanosecondArray = PrimitiveArray<TimestampNanosecondType>;
// TODO: give examples for the below types
-/// A primitive array where each element is of 32-bit value
-/// representing the elapsed time since UNIX epoch in days."
+/// An array of 32-bit values representing the elapsed time
+/// since UNIX epoch in days
///
/// This type is similar to the [`chrono::NaiveDate`] type and can hold
/// values such as `2018-11-13`
pub type Date32Array = PrimitiveArray<Date32Type>;
-/// A primitive array where each element is a 64-bit value
-/// representing the elapsed time since the UNIX epoch in milliseconds.
+
+/// An array of 64-bit values representing the elapsed time
+/// since UNIX epoch in milliseconds
///
-/// This type is similar to the [`chrono::NaiveDateTime`] type and can hold
-/// values such as `2018-11-13T17:11:10.011`
+/// This type is similar to the [`chrono::NaiveDate`] type and can hold
+/// values such as `2018-11-13`
pub type Date64Array = PrimitiveArray<Date64Type>;
-/// An array where each element is of 32-bit type representing time elapsed in
seconds
-/// since midnight.
+/// An array of 32-bit values representing the elapsed time
+/// since midnight in seconds
///
/// This type is similar to the [`chrono::NaiveTime`] type and can
/// hold values such as `00:02:00`
pub type Time32SecondArray = PrimitiveArray<Time32SecondType>;
-/// An array where each element is of 32-bit type representing time elapsed in
milliseconds
-/// since midnight.
+
+/// An array of 32-bit values representing the elapsed time
+/// since midnight in milliseconds
///
/// This type is similar to the [`chrono::NaiveTime`] type and can
/// hold values such as `00:02:00.123`
pub type Time32MillisecondArray = PrimitiveArray<Time32MillisecondType>;
-/// An array where each element is of 64-bit type representing time elapsed in
microseconds
-/// since midnight.
+
+/// An array of 64-bit values representing the elapsed time
+/// since midnight in microseconds
///
/// This type is similar to the [`chrono::NaiveTime`] type and can
/// hold values such as `00:02:00.123456`
pub type Time64MicrosecondArray = PrimitiveArray<Time64MicrosecondType>;
-/// An array where each element is of 64-bit type representing time elapsed in
nanoseconds
-/// since midnight.
+
+/// An array of 64-bit values representing the elapsed time
+/// since midnight in nanoseconds
Review Comment:
```suggestion
/// An array of 64-bit values representing the elapsed nanoseconds
/// since midnight
```
##########
arrow-array/src/lib.rs:
##########
@@ -91,63 +55,114 @@
//!
//! // Append a single primitive value
//! builder.append_value(1);
-//!
//! // Append a null value
//! builder.append_null();
-//!
//! // Append a slice of primitive values
//! builder.append_slice(&[2, 3, 4]);
//!
//! // Build the array
//! let array = builder.finish();
//!
-//! assert_eq!(
-//! 5,
-//! array.len(),
-//! "The array has 5 values, counting the null value"
-//! );
+//! assert_eq!(5, array.len());
+//! assert_eq!(2, array.value(2));
+//! assert_eq!(&array.values()[3..5], &[3, 4])
+//! ```
//!
-//! assert_eq!(2, array.value(2), "Get the value with index 2");
+//! # Low-level API
+//!
+//! Internally, arrays consist of one or more shared memory regions backed by
[`Buffer`],
+//! the number and meaning of which depend on the array’s data type, as
documented in
+//! the [Arrow specification].
+//!
+//! For example, the type [`Int16Array`] represents an array of 16-bit
integers and consists of:
+//!
+//! * An optional [`NullBuffer`] identifying any null values
+//! * A contiguous [`ScalarBuffer<i16>`]
+//!
+//! Similarly, the type [`StringArray`] represents an array of UTF-8 strings
and consists of:
+//!
+//! * An optional [`NullBuffer`] identifying any null values
+//! * An offsets [`ScalarBuffer<i32>`] identifying valid UTF-8 sequences
within the values buffer
+//! * A values [`Buffer`] of UTF-8 encoded string data
+//!
+//! Array constructors such as [`PrimitiveArray::try_new`] provide the ability
to cheaply
+//! construct an array from these parts, with functions such as
[`PrimitiveArray::into_parts`]
+//! providing the reverse operation.
//!
-//! assert_eq!(
-//! &array.values()[3..5],
-//! &[3, 4],
-//! "Get slice of len 2 starting at idx 3"
-//! )
//! ```
+//! # use arrow_array::{Array, Int32Array, StringArray};
+//! # use arrow_buffer::OffsetBuffer;
+//! #
+//! // Create a Int32Array from Vec without copying
+//! let array = Int32Array::new(vec![1, 2, 3].into(), None);
+//! assert_eq!(array.values(), &[1, 2, 3]);
+//! assert_eq!(array.null_count(), 0);
+//!
+//! // Create a StringArray from parts
+//! let offsets = OffsetBuffer::new(vec![0, 5, 10].into());
+//! let array = StringArray::new(offsets, b"helloworld".into(), None);
+//! let values: Vec<_> = array.iter().map(|x| x.unwrap()).collect();
+//! assert_eq!(values, &["hello", "world"]);
+//! ```
+//!
+//! As [`Buffer`], and its derivatives, can be created from [`Vec`] without
copying, this provides
+//! an efficient way to not only interoperate with other Rust code, but also
implement kernels
+//! optimised for the arrow data layout - e.g. by handling buffers instead of
values.
//!
//! # Zero-Copy Slicing
//!
//! Given an [`Array`] of arbitrary length, it is possible to create an owned
slice of this
//! data. Internally this just increments some ref-counts, and so is
incredibly cheap
//!
//! ```rust
-//! # use std::sync::Arc;
-//! # use arrow_array::{ArrayRef, Int32Array};
-//! let array = Arc::new(Int32Array::from_iter([1, 2, 3])) as ArrayRef;
+//! # use arrow_array::Int32Array;
+//! let array = Int32Array::from_iter([1, 2, 3]);
//!
//! // Slice with offset 1 and length 2
//! let sliced = array.slice(1, 2);
-//! let ints = sliced.as_any().downcast_ref::<Int32Array>().unwrap();
-//! assert_eq!(ints.values(), &[2, 3]);
+//! assert_eq!(sliced.values(), &[2, 3]);
//! ```
//!
-//! # Internal Representation
+//! # Downcasting an Array
//!
-//! Internally, arrays are represented by one or several [`Buffer`], the
number and meaning of
-//! which depend on the array’s data type, as documented in the [Arrow
specification].
+//! Arrays are often passed around as a dynamically typed [`&dyn Array`] or
[`ArrayRef`].
+//! For example, [`RecordBatch`](`crate::RecordBatch`) stores columns as
[`ArrayRef`].
//!
-//! For example, the type [`Int16Array`] represents an array of 16-bit
integers and consists of:
+//! Whilst these arrays can be passed directly to the [`compute`], [`csv`],
[`json`], etc... APIs,
+//! it is often the case that you wish to interact with the concrete arrays
directly.
//!
-//! * An optional [`NullBuffer`] identifying any null values
-//! * A contiguous [`Buffer`] of 16-bit integers
+//! This requires downcasting to the concrete type of the array:
//!
-//! Similarly, the type [`StringArray`] represents an array of UTF-8 strings
and consists of:
+//! ```
+//! # use arrow_array::{Array, Float32Array, Int32Array};
//!
-//! * An optional [`NullBuffer`] identifying any null values
-//! * An offsets [`Buffer`] of 32-bit integers identifying valid UTF-8
sequences within the values buffer
-//! * A values [`Buffer`] of UTF-8 encoded string data
+//! fn sum_int32(array: &dyn Array) -> i32 {
Review Comment:
it might help to add some additional context to what the function is doing:
```suggestion
//! // Safely downcast an `Array` to an `Int32Array` and compute the sum
//! // using native i32 values
//! fn sum_int32(array: &dyn Array) -> i32 {
```
##########
arrow-array/src/lib.rs:
##########
@@ -91,63 +55,114 @@
//!
//! // Append a single primitive value
//! builder.append_value(1);
-//!
//! // Append a null value
//! builder.append_null();
-//!
//! // Append a slice of primitive values
//! builder.append_slice(&[2, 3, 4]);
//!
//! // Build the array
//! let array = builder.finish();
//!
-//! assert_eq!(
-//! 5,
-//! array.len(),
-//! "The array has 5 values, counting the null value"
-//! );
+//! assert_eq!(5, array.len());
+//! assert_eq!(2, array.value(2));
+//! assert_eq!(&array.values()[3..5], &[3, 4])
+//! ```
//!
-//! assert_eq!(2, array.value(2), "Get the value with index 2");
+//! # Low-level API
+//!
+//! Internally, arrays consist of one or more shared memory regions backed by
[`Buffer`],
+//! the number and meaning of which depend on the array’s data type, as
documented in
+//! the [Arrow specification].
+//!
+//! For example, the type [`Int16Array`] represents an array of 16-bit
integers and consists of:
+//!
+//! * An optional [`NullBuffer`] identifying any null values
+//! * A contiguous [`ScalarBuffer<i16>`]
+//!
+//! Similarly, the type [`StringArray`] represents an array of UTF-8 strings
and consists of:
+//!
+//! * An optional [`NullBuffer`] identifying any null values
+//! * An offsets [`ScalarBuffer<i32>`] identifying valid UTF-8 sequences
within the values buffer
Review Comment:
Aren't the offsets actually an `OffsetBuffer`?
##########
arrow-array/src/lib.rs:
##########
@@ -91,63 +55,114 @@
//!
//! // Append a single primitive value
//! builder.append_value(1);
-//!
//! // Append a null value
//! builder.append_null();
-//!
//! // Append a slice of primitive values
//! builder.append_slice(&[2, 3, 4]);
//!
//! // Build the array
//! let array = builder.finish();
//!
-//! assert_eq!(
-//! 5,
-//! array.len(),
-//! "The array has 5 values, counting the null value"
-//! );
+//! assert_eq!(5, array.len());
+//! assert_eq!(2, array.value(2));
+//! assert_eq!(&array.values()[3..5], &[3, 4])
+//! ```
//!
-//! assert_eq!(2, array.value(2), "Get the value with index 2");
+//! # Low-level API
+//!
+//! Internally, arrays consist of one or more shared memory regions backed by
[`Buffer`],
+//! the number and meaning of which depend on the array’s data type, as
documented in
+//! the [Arrow specification].
+//!
+//! For example, the type [`Int16Array`] represents an array of 16-bit
integers and consists of:
+//!
+//! * An optional [`NullBuffer`] identifying any null values
+//! * A contiguous [`ScalarBuffer<i16>`]
+//!
+//! Similarly, the type [`StringArray`] represents an array of UTF-8 strings
and consists of:
+//!
+//! * An optional [`NullBuffer`] identifying any null values
+//! * An offsets [`ScalarBuffer<i32>`] identifying valid UTF-8 sequences
within the values buffer
+//! * A values [`Buffer`] of UTF-8 encoded string data
+//!
+//! Array constructors such as [`PrimitiveArray::try_new`] provide the ability
to cheaply
+//! construct an array from these parts, with functions such as
[`PrimitiveArray::into_parts`]
+//! providing the reverse operation.
//!
-//! assert_eq!(
-//! &array.values()[3..5],
-//! &[3, 4],
-//! "Get slice of len 2 starting at idx 3"
-//! )
//! ```
+//! # use arrow_array::{Array, Int32Array, StringArray};
+//! # use arrow_buffer::OffsetBuffer;
+//! #
+//! // Create a Int32Array from Vec without copying
+//! let array = Int32Array::new(vec![1, 2, 3].into(), None);
+//! assert_eq!(array.values(), &[1, 2, 3]);
+//! assert_eq!(array.null_count(), 0);
+//!
+//! // Create a StringArray from parts
+//! let offsets = OffsetBuffer::new(vec![0, 5, 10].into());
+//! let array = StringArray::new(offsets, b"helloworld".into(), None);
+//! let values: Vec<_> = array.iter().map(|x| x.unwrap()).collect();
+//! assert_eq!(values, &["hello", "world"]);
+//! ```
+//!
+//! As [`Buffer`], and its derivatives, can be created from [`Vec`] without
copying, this provides
+//! an efficient way to not only interoperate with other Rust code, but also
implement kernels
+//! optimised for the arrow data layout - e.g. by handling buffers instead of
values.
//!
//! # Zero-Copy Slicing
//!
//! Given an [`Array`] of arbitrary length, it is possible to create an owned
slice of this
//! data. Internally this just increments some ref-counts, and so is
incredibly cheap
//!
//! ```rust
-//! # use std::sync::Arc;
-//! # use arrow_array::{ArrayRef, Int32Array};
-//! let array = Arc::new(Int32Array::from_iter([1, 2, 3])) as ArrayRef;
+//! # use arrow_array::Int32Array;
+//! let array = Int32Array::from_iter([1, 2, 3]);
//!
//! // Slice with offset 1 and length 2
//! let sliced = array.slice(1, 2);
-//! let ints = sliced.as_any().downcast_ref::<Int32Array>().unwrap();
-//! assert_eq!(ints.values(), &[2, 3]);
+//! assert_eq!(sliced.values(), &[2, 3]);
//! ```
//!
-//! # Internal Representation
+//! # Downcasting an Array
//!
-//! Internally, arrays are represented by one or several [`Buffer`], the
number and meaning of
-//! which depend on the array’s data type, as documented in the [Arrow
specification].
+//! Arrays are often passed around as a dynamically typed [`&dyn Array`] or
[`ArrayRef`].
+//! For example, [`RecordBatch`](`crate::RecordBatch`) stores columns as
[`ArrayRef`].
//!
-//! For example, the type [`Int16Array`] represents an array of 16-bit
integers and consists of:
+//! Whilst these arrays can be passed directly to the [`compute`], [`csv`],
[`json`], etc... APIs,
+//! it is often the case that you wish to interact with the concrete arrays
directly.
//!
-//! * An optional [`NullBuffer`] identifying any null values
-//! * A contiguous [`Buffer`] of 16-bit integers
+//! This requires downcasting to the concrete type of the array:
//!
-//! Similarly, the type [`StringArray`] represents an array of UTF-8 strings
and consists of:
+//! ```
+//! # use arrow_array::{Array, Float32Array, Int32Array};
//!
-//! * An optional [`NullBuffer`] identifying any null values
-//! * An offsets [`Buffer`] of 32-bit integers identifying valid UTF-8
sequences within the values buffer
-//! * A values [`Buffer`] of UTF-8 encoded string data
+//! fn sum_int32(array: &dyn Array) -> i32 {
+//! let integers: &Int32Array = array.as_any().downcast_ref().unwrap();
+//! integers.iter().map(|val| val.unwrap_or_default()).sum()
+//! }
+//!
+//! // Note: the values for positions corresponding to nulls will be arbitrary
Review Comment:
```suggestion
//! // Safely downcasts the array to a `Float32Array` and returns a &[f32]
view of the data
//! // Note: the values for positions corresponding to nulls will be
arbitrary (but still valid f32)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]