This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new b00f4e09ab Update docs for datatypes (#5260)
b00f4e09ab is described below
commit b00f4e09abbcaf2fabd742c1e8af331bd6841cf1
Author: Jeffrey <[email protected]>
AuthorDate: Mon Jan 1 06:16:22 2024 +1100
Update docs for datatypes (#5260)
* Update docs for datatypes
* Update docs
* Fix reinterpret_cast doc
* Update arrow-schema/src/datatype.rs
Co-authored-by: Andrew Lamb <[email protected]>
---------
Co-authored-by: Raphael Taylor-Davies
<[email protected]>
Co-authored-by: Andrew Lamb <[email protected]>
---
arrow-array/src/array/primitive_array.rs | 5 ++---
arrow-schema/src/datatype.rs | 34 +++++++++++++++++++-------------
2 files changed, 22 insertions(+), 17 deletions(-)
diff --git a/arrow-array/src/array/primitive_array.rs
b/arrow-array/src/array/primitive_array.rs
index 2296cebd46..a6d5c22772 100644
--- a/arrow-array/src/array/primitive_array.rs
+++ b/arrow-array/src/array/primitive_array.rs
@@ -713,9 +713,8 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
/// the semantic values of the array, e.g. 100 milliseconds in a
[`TimestampNanosecondArray`]
/// will become 100 seconds in a [`TimestampSecondArray`].
///
- /// For casts that preserve the semantic value, check out the [compute
kernels]
- ///
- /// [compute
kernels](https://docs.rs/arrow/latest/arrow/compute/kernels/cast/index.html)
+ /// For casts that preserve the semantic value, check out the
+ /// [compute
kernels](https://docs.rs/arrow/latest/arrow/compute/kernels/cast/index.html).
///
/// ```
/// # use arrow_array::{Int64Array, TimestampNanosecondArray};
diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs
index 8ba52200c4..6276a99a47 100644
--- a/arrow-schema/src/datatype.rs
+++ b/arrow-schema/src/datatype.rs
@@ -23,14 +23,18 @@ use crate::{Field, FieldRef, Fields, UnionFields};
/// The set of datatypes that are supported by this implementation of Apache
Arrow.
///
/// The Arrow specification on data types includes some more types.
-/// See also
[`Schema.fbs`](https://github.com/apache/arrow/blob/master/format/Schema.fbs)
+/// See also
[`Schema.fbs`](https://github.com/apache/arrow/blob/main/format/Schema.fbs)
/// for Arrow's specification.
///
/// The variants of this enum include primitive fixed size types as well as
parametric or
/// nested types.
-/// Currently the Rust implementation supports the following nested types:
+/// Currently the Rust implementation supports the following nested types:
/// - `List<T>`
+/// - `LargeList<T>`
+/// - `FixedSizeList<T>`
/// - `Struct<T, U, V, ...>`
+/// - `Union<T, U, V, ...>`
+/// - `Map<K, V>`
///
/// Nested types can themselves be nested within other arrays.
/// For more information on these types please see
@@ -68,7 +72,7 @@ pub enum DataType {
///
/// Time is measured as a Unix epoch, counting the seconds from
/// 00:00:00.000 on 1 January 1970, excluding leap seconds,
- /// as a 64-bit integer.
+ /// as a signed 64-bit integer.
///
/// The time zone is a string indicating the name of a time zone, one of:
///
@@ -140,15 +144,17 @@ pub enum DataType {
/// DataType::Timestamp(TimeUnit::Second,
Some("string".to_string().into()));
/// ```
Timestamp(TimeUnit, Option<Arc<str>>),
- /// A 32-bit date representing the elapsed time since UNIX epoch
(1970-01-01)
+ /// A signed 32-bit date representing the elapsed time since UNIX epoch
(1970-01-01)
/// in days (32 bits).
Date32,
- /// A 64-bit date representing the elapsed time since UNIX epoch
(1970-01-01)
+ /// A signed 64-bit date representing the elapsed time since UNIX epoch
(1970-01-01)
/// in milliseconds (64 bits). Values are evenly divisible by 86400000.
Date64,
- /// A 32-bit time representing the elapsed time since midnight in the unit
of `TimeUnit`.
+ /// A signed 32-bit time representing the elapsed time since midnight in
the unit of `TimeUnit`.
+ /// Must be either seconds or milliseconds.
Time32(TimeUnit),
- /// A 64-bit time representing the elapsed time since midnight in the unit
of `TimeUnit`.
+ /// A signed 64-bit time representing the elapsed time since midnight in
the unit of `TimeUnit`.
+ /// Must be either microseconds or nanoseconds.
Time64(TimeUnit),
/// Measure of elapsed time in either seconds, milliseconds, microseconds
or nanoseconds.
Duration(TimeUnit),
@@ -159,7 +165,7 @@ pub enum DataType {
/// Opaque binary data of variable length.
///
/// A single Binary array can store up to [`i32::MAX`] bytes
- /// of binary data in total
+ /// of binary data in total.
Binary,
/// Opaque binary data of fixed size.
/// Enum parameter specifies the number of bytes per value.
@@ -167,27 +173,27 @@ pub enum DataType {
/// Opaque binary data of variable length and 64-bit offsets.
///
/// A single LargeBinary array can store up to [`i64::MAX`] bytes
- /// of binary data in total
+ /// of binary data in total.
LargeBinary,
- /// A variable-length string in Unicode with UTF-8 encoding
+ /// A variable-length string in Unicode with UTF-8 encoding.
///
/// A single Utf8 array can store up to [`i32::MAX`] bytes
- /// of string data in total
+ /// of string data in total.
Utf8,
/// A variable-length string in Unicode with UFT-8 encoding and 64-bit
offsets.
///
/// A single LargeUtf8 array can store up to [`i64::MAX`] bytes
- /// of string data in total
+ /// of string data in total.
LargeUtf8,
/// A list of some logical data type with variable length.
///
- /// A single List array can store up to [`i32::MAX`] elements in total
+ /// A single List array can store up to [`i32::MAX`] elements in total.
List(FieldRef),
/// A list of some logical data type with fixed length.
FixedSizeList(FieldRef, i32),
/// A list of some logical data type with variable length and 64-bit
offsets.
///
- /// A single LargeList array can store up to [`i64::MAX`] elements in total
+ /// A single LargeList array can store up to [`i64::MAX`] elements in
total.
LargeList(FieldRef),
/// A nested datatype that contains a number of sub-fields.
Struct(Fields),