This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new b3b814c  ARROW-4304: [Rust] Enhance documentation for arrow
b3b814c is described below

commit b3b814c141ee7cb88601c91ae5f75659a5a6b7fc
Author: Andreas Zimmerer <[email protected]>
AuthorDate: Mon Apr 6 11:35:42 2020 -0600

    ARROW-4304: [Rust] Enhance documentation for arrow
    
    Hi all,
    
    I hope I don't bother you too much with this.
    I recently started using the Rust implementation of Apache Arrow and it's 
going well so far! However, I noticed that the documentation is a bit sparse.
    
    I then dug a little bit around in Jira and noticed there are a bunch of 
issues targeting enhancement of documentation, most notably 
[ARROW-4304](https://issues.apache.org/jira/browse/ARROW-4304), 
[ARROW-4683](https://issues.apache.org/jira/browse/ARROW-4683) and 
[ARROW-4927](https://issues.apache.org/jira/browse/ARROW-4927).
    
    I then started documenting a few things and adding working doctests to it. 
I also updated some external links that were broken.
    
    It's far from complete but I guess it's a decent start for a wonderful 
documentation of this crate :blush:
    
    I am happy to discuss what I've written :innocent:
    
    - [x] tests passing (including doctests)
    - [x] code formatting passes
    - [x] link check (only for external links)
    
    Closes #6828 from Jibbow/arrow-doc
    
    Authored-by: Andreas Zimmerer <[email protected]>
    Signed-off-by: Andy Grove <[email protected]>
---
 rust/arrow/src/array/array.rs   | 152 +++++++++++++++++++++++++++++-----
 rust/arrow/src/array/builder.rs | 176 +++++++++++++++++++++++++++++++++++-----
 rust/arrow/src/array/mod.rs     |  33 +++++++-
 rust/arrow/src/datatypes.rs     |  88 ++++++++++++++++----
 rust/arrow/src/record_batch.rs  | 113 ++++++++++++++++++++++----
 5 files changed, 489 insertions(+), 73 deletions(-)

diff --git a/rust/arrow/src/array/array.rs b/rust/arrow/src/array/array.rs
index 05620e3..9c04b16 100644
--- a/rust/arrow/src/array/array.rs
+++ b/rust/arrow/src/array/array.rs
@@ -45,57 +45,166 @@ const MICROSECONDS: i64 = 1_000_000;
 const NANOSECONDS: i64 = 1_000_000_000;
 
 /// Trait for dealing with different types of array at runtime when the type 
of the
-/// array is not known in advance
+/// array is not known in advance.
 pub trait Array: fmt::Debug + Send + Sync + ArrayEqual + JsonEqual {
-    /// Returns the array as `Any` so that it can be downcast to a specific 
implementation
+    /// Returns the array as [`Any`](std::any::Any) so that it can be
+    /// downcasted to a specific implementation.
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// use std::sync::Arc;
+    /// use arrow::array::Int32Array;
+    /// use arrow::datatypes::{Schema, Field, DataType};
+    /// use arrow::record_batch::RecordBatch;
+    ///
+    /// # fn main() -> arrow::error::Result<()> {
+    /// let id = Int32Array::from(vec![1, 2, 3, 4, 5]);
+    /// let batch = RecordBatch::try_new(
+    ///     Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, 
false)])),
+    ///     vec![Arc::new(id)]
+    /// )?;
+    ///
+    /// let int32array = batch
+    ///     .column(0)
+    ///     .as_any()
+    ///     .downcast_ref::<Int32Array>()
+    ///     .expect("Failed to downcast");
+    /// # Ok(())
+    /// # }
+    /// ```
     fn as_any(&self) -> &Any;
 
-    /// Returns a reference-counted pointer to the data of this array
+    /// Returns a reference-counted pointer to the underlying data of this 
array.
     fn data(&self) -> ArrayDataRef;
 
-    /// Returns a borrowed & reference-counted pointer to the data of this 
array
+    /// Returns a borrowed & reference-counted pointer to the underlying data 
of this array.
     fn data_ref(&self) -> &ArrayDataRef;
 
-    /// Returns a reference to the data type of this array
+    /// Returns a reference to the [`DataType`](crate::datatype::DataType) of 
this array.
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// use arrow::datatypes::DataType;
+    /// use arrow::array::{Array, Int32Array};
+    ///
+    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
+    ///
+    /// assert_eq!(*array.data_type(), DataType::Int32);
+    /// ```
     fn data_type(&self) -> &DataType {
         self.data_ref().data_type()
     }
 
     /// Returns a zero-copy slice of this array with the indicated offset and 
length.
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// use arrow::array::{Array, Int32Array};
+    ///
+    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
+    /// // Make slice over the values [2, 3, 4]
+    /// let array_slice = array.slice(1, 3);
+    ///
+    /// assert!(array_slice.equals(&Int32Array::from(vec![2, 3, 4])));
+    /// ```
     fn slice(&self, offset: usize, length: usize) -> ArrayRef {
         make_array(slice_data(self.data(), offset, length))
     }
 
-    /// Returns the length (i.e., number of elements) of this array
+    /// Returns the length (i.e., number of elements) of this array.
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// use arrow::array::{Array, Int32Array};
+    ///
+    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
+    ///
+    /// assert_eq!(array.len(), 5);
+    /// ```
     fn len(&self) -> usize {
         self.data().len()
     }
 
-    /// Returns the offset of this array
+    /// Returns the offset into the underlying data used by this array(-slice).
+    /// Note that the underlying data can be shared by many arrays.
+    /// This defaults to `0`.
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// use arrow::array::{Array, Int32Array};
+    ///
+    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
+    /// // Make slice over the values [2, 3, 4]
+    /// let array_slice = array.slice(1, 3);
+    ///
+    /// assert_eq!(array.offset(), 0);
+    /// assert_eq!(array_slice.offset(), 1);
+    /// ```
     fn offset(&self) -> usize {
         self.data().offset()
     }
 
-    /// Returns whether the element at index `i` is null
-    fn is_null(&self, i: usize) -> bool {
-        self.data().is_null(self.data().offset() + i)
+    /// Returns whether the element at `index` is null.
+    /// When using this function on a slice, the index is relative to the 
slice.
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// use arrow::array::{Array, Int32Array};
+    ///
+    /// let array = Int32Array::from(vec![Some(1), None]);
+    ///
+    /// assert_eq!(array.is_null(0), false);
+    /// assert_eq!(array.is_null(1), true);
+    /// ```
+    fn is_null(&self, index: usize) -> bool {
+        self.data().is_null(self.data().offset() + index)
     }
 
-    /// Returns whether the element at index `i` is not null
-    fn is_valid(&self, i: usize) -> bool {
-        self.data().is_valid(self.data().offset() + i)
+    /// Returns whether the element at `index` is not null.
+    /// When using this function on a slice, the index is relative to the 
slice.
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// use arrow::array::{Array, Int32Array};
+    ///
+    /// let array = Int32Array::from(vec![Some(1), None]);
+    ///
+    /// assert_eq!(array.is_valid(0), true);
+    /// assert_eq!(array.is_valid(1), false);
+    /// ```
+    fn is_valid(&self, index: usize) -> bool {
+        self.data().is_valid(self.data().offset() + index)
     }
 
-    /// Returns the total number of nulls in this array
+    /// Returns the total number of null values in this array.
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// use arrow::array::{Array, Int32Array};
+    ///
+    /// // Construct an array with values [1, NULL, NULL]
+    /// let array = Int32Array::from(vec![Some(1), None, None]);
+    ///
+    /// assert_eq!(array.null_count(), 2);
+    /// ```
     fn null_count(&self) -> usize {
         self.data().null_count()
     }
 }
 
+/// A reference-counted reference to a generic `Array`.
 pub type ArrayRef = Arc<Array>;
 
-/// Constructs an array using the input `data`. Returns a reference-counted 
`Array`
-/// instance.
+/// Constructs an array using the input `data`.
+/// Returns a reference-counted `Array` instance.
 pub fn make_array(data: ArrayDataRef) -> ArrayRef {
     match data.data_type() {
         DataType::Boolean => Arc::new(BooleanArray::from(data)) as ArrayRef,
@@ -197,6 +306,11 @@ pub fn make_array(data: ArrayDataRef) -> ArrayRef {
     }
 }
 
+/// Creates a zero-copy slice of the array's data.
+///
+/// # Panics
+///
+/// Panics if `offset + length < data.len()`.
 fn slice_data(data: ArrayDataRef, mut offset: usize, length: usize) -> 
ArrayDataRef {
     assert!((offset + length) <= data.len());
 
@@ -316,14 +430,14 @@ impl<T: ArrowNumericType> PrimitiveArray<T> {
         PrimitiveArray::from(array_data)
     }
 
-    /// Returns a `Buffer` holds all the values of this array.
+    /// Returns a `Buffer` holding all the values of this array.
     ///
-    /// Note this doesn't take account into the offset of this array.
+    /// Note this doesn't take the offset of this array into account.
     pub fn values(&self) -> Buffer {
         self.data.buffers()[0].clone()
     }
 
-    /// Returns the length of this array
+    /// Returns the length of this array.
     pub fn len(&self) -> usize {
         self.data.len()
     }
diff --git a/rust/arrow/src/array/builder.rs b/rust/arrow/src/array/builder.rs
index bd73dbd..00d9433 100644
--- a/rust/arrow/src/array/builder.rs
+++ b/rust/arrow/src/array/builder.rs
@@ -15,8 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Defines a `BufferBuilder` capable of creating a `Buffer` which can be used 
as an
-//! internal buffer in an `ArrayData` object.
+//! Defines a [`BufferBuilder`](crate::array::BufferBuilder) capable
+//! of creating a [`Buffer`](crate::buffer::Buffer) which can be used
+//! as an internal buffer in an [`ArrayData`](crate::array::ArrayData)
+//! object.
 
 use std::any::Any;
 use std::collections::HashMap;
@@ -31,29 +33,172 @@ use crate::datatypes::*;
 use crate::error::{ArrowError, Result};
 use crate::util::bit_util;
 
-/// Buffer builder with zero-copy build method
+/// Builder for creating a [`Buffer`](crate::buffer::Buffer) object.
+///
+/// This builder is implemented for primitive types and creates a
+/// buffer with a zero-copy `build()` method.
+///
+/// See trait [`BufferBuilderTrait`](crate::array::BufferBuilderTrait)
+/// for further documentation and examples.
+///
+/// A [`Buffer`](crate::buffer::Buffer) is the underlying data
+/// structure of Arrow's [`Arrays`](crate::array::Array).
+///
+/// For all supported types, there are type definitions for the
+/// generic version of `BufferBuilder<T>`, e.g. `UInt8BufferBuilder`.
+///
+/// # Example:
+///
+/// ```
+/// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait};
+///
+/// # fn main() -> arrow::error::Result<()> {
+/// let mut builder = UInt8BufferBuilder::new(100);
+/// builder.append_slice(&[42, 43, 44]);
+/// builder.append(45);
+/// let buffer = builder.finish();
+///
+/// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 43, 44, 45]);
+/// # Ok(())
+/// # }
+/// ```
 pub struct BufferBuilder<T: ArrowPrimitiveType> {
     buffer: MutableBuffer,
     len: usize,
     _marker: PhantomData<T>,
 }
 
-// Trait for buffer builder. This is used mainly to offer separate 
implementations for
-// numeric types and boolean types, while still be able to call methods on 
buffer builder
-// with generic primitive type.
+/// Trait for simplifying the construction of 
[`Buffers`](crate::buffer::Buffer).
+///
+/// This trait is used mainly to offer separate implementations for
+/// numeric types and boolean types, while still be able to call methods on 
buffer builder
+/// with generic primitive type.
+/// Seperate implementations of this trait allow to add implementation-details,
+/// e.g. the implementation for boolean types uses bit-packing.
 pub trait BufferBuilderTrait<T: ArrowPrimitiveType> {
+    /// Creates a new builder with initial capacity for _at least_ `capacity`
+    /// elements of type `T`.
+    ///
+    /// The capacity can later be manually adjusted with the
+    /// [`reserve()`](BufferBuilderTrait::reserve) method.
+    /// Also the
+    /// [`append()`](BufferBuilderTrait::append),
+    /// [`append_slice()`](BufferBuilderTrait::append_slice) and
+    /// [`advance()`](BufferBuilderTrait::advance)
+    /// methods automatically increase the capacity if needed.
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait};
+    ///
+    /// let mut builder = UInt8BufferBuilder::new(10);
+    ///
+    /// assert!(builder.capacity() >= 10);
+    /// ```
     fn new(capacity: usize) -> Self;
+
+    /// Returns the current number of array elements in the internal buffer.
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait};
+    ///
+    /// let mut builder = UInt8BufferBuilder::new(10);
+    /// builder.append(42);
+    ///
+    /// assert_eq!(builder.len(), 1);
+    /// ```
     fn len(&self) -> usize;
+
+    /// Returns the actual capacity (number of elements) of the internal 
buffer.
+    ///
+    /// Note: the internal capacity returned by this method might be larger 
than
+    /// what you'd expect after setting the capacity in the `new()` or 
`reserve()`
+    /// functions.
     fn capacity(&self) -> usize;
-    fn advance(&mut self, i: usize) -> Result<()>;
+
+    /// Increases the number of elements in the internal buffer by `n`
+    /// and resizes the buffer as needed.
+    ///
+    /// The values of the newly added elements are undefined.
+    /// This method is usually used when appending `NULL` values to the buffer
+    /// as they still require physical memory space.
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait};
+    ///
+    /// let mut builder = UInt8BufferBuilder::new(10);
+    /// builder.advance(2);
+    ///
+    /// assert_eq!(builder.len(), 2);
+    /// ```
+    fn advance(&mut self, n: usize) -> Result<()>;
+
+    /// Reserves memory for _at least_ `n` more elements of type `T`.
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait};
+    ///
+    /// let mut builder = UInt8BufferBuilder::new(10);
+    /// builder.reserve(10);
+    ///
+    /// assert!(builder.capacity() >= 20);
+    /// ```
     fn reserve(&mut self, n: usize) -> Result<()>;
-    fn append(&mut self, v: T::Native) -> Result<()>;
+
+    /// Appends a value of type `T` into the builder,
+    /// growing the internal buffer as needed.
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait};
+    ///
+    /// let mut builder = UInt8BufferBuilder::new(10);
+    /// builder.append(42);
+    ///
+    /// assert_eq!(builder.len(), 1);
+    /// ```
+    fn append(&mut self, value: T::Native) -> Result<()>;
+
+    /// Appends a slice of type `T`, growing the internal buffer as needed.
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait};
+    ///
+    /// let mut builder = UInt8BufferBuilder::new(10);
+    /// builder.append_slice(&[42, 44, 46]);
+    ///
+    /// assert_eq!(builder.len(), 3);
+    /// ```
     fn append_slice(&mut self, slice: &[T::Native]) -> Result<()>;
+
+    /// Resets this builder and returns an immutable 
[`Buffer`](crate::buffer::Buffer).
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait};
+    ///
+    /// let mut builder = UInt8BufferBuilder::new(10);
+    /// builder.append_slice(&[42, 44, 46]);
+    ///
+    /// let buffer = builder.finish();
+    ///
+    /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]);
+    /// ```
     fn finish(&mut self) -> Buffer;
 }
 
 impl<T: ArrowPrimitiveType> BufferBuilderTrait<T> for BufferBuilder<T> {
-    /// Creates a builder with a fixed initial capacity
     default fn new(capacity: usize) -> Self {
         let buffer = MutableBuffer::new(capacity * 
mem::size_of::<T::Native>());
         Self {
@@ -63,18 +208,15 @@ impl<T: ArrowPrimitiveType> BufferBuilderTrait<T> for 
BufferBuilder<T> {
         }
     }
 
-    /// Returns the number of array elements (slots) in the builder
     fn len(&self) -> usize {
         self.len
     }
 
-    /// Returns the current capacity of the builder (number of elements)
     fn capacity(&self) -> usize {
         let bit_capacity = self.buffer.capacity() * 8;
         (bit_capacity / T::get_bit_width())
     }
 
-    // Advances the `len` of the underlying `Buffer` by `i` slots of type T
     default fn advance(&mut self, i: usize) -> Result<()> {
         let new_buffer_len = (self.len + i) * mem::size_of::<T::Native>();
         self.buffer.resize(new_buffer_len)?;
@@ -82,7 +224,6 @@ impl<T: ArrowPrimitiveType> BufferBuilderTrait<T> for 
BufferBuilder<T> {
         Ok(())
     }
 
-    /// Reserves memory for `n` elements of type `T`.
     default fn reserve(&mut self, n: usize) -> Result<()> {
         let new_capacity = self.len + n;
         let byte_capacity = mem::size_of::<T::Native>() * new_capacity;
@@ -90,20 +231,17 @@ impl<T: ArrowPrimitiveType> BufferBuilderTrait<T> for 
BufferBuilder<T> {
         Ok(())
     }
 
-    /// Appends a value into the builder, growing the internal buffer as 
needed.
     default fn append(&mut self, v: T::Native) -> Result<()> {
         self.reserve(1)?;
         self.write_bytes(v.to_byte_slice(), 1)
     }
 
-    /// Appends a slice of type `T`, growing the internal buffer as needed.
     default fn append_slice(&mut self, slice: &[T::Native]) -> Result<()> {
         let array_slots = slice.len();
         self.reserve(array_slots)?;
         self.write_bytes(slice.to_byte_slice(), array_slots)
     }
 
-    /// Reset this builder and returns an immutable `Buffer`.
     default fn finish(&mut self) -> Buffer {
         let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
         self.len = 0;
@@ -131,7 +269,6 @@ impl<T: ArrowPrimitiveType> BufferBuilder<T> {
 }
 
 impl BufferBuilderTrait<BooleanType> for BufferBuilder<BooleanType> {
-    /// Creates a builder with a fixed initial capacity.
     fn new(capacity: usize) -> Self {
         let byte_capacity = bit_util::ceil(capacity, 8);
         let actual_capacity = 
bit_util::round_upto_multiple_of_64(byte_capacity);
@@ -144,7 +281,6 @@ impl BufferBuilderTrait<BooleanType> for 
BufferBuilder<BooleanType> {
         }
     }
 
-    // Advances the `len` of the underlying `Buffer` by `i` slots of type T
     fn advance(&mut self, i: usize) -> Result<()> {
         let new_buffer_len = bit_util::ceil(self.len + i, 8);
         self.buffer.resize(new_buffer_len)?;
@@ -152,7 +288,6 @@ impl BufferBuilderTrait<BooleanType> for 
BufferBuilder<BooleanType> {
         Ok(())
     }
 
-    /// Appends a value into the builder, growing the internal buffer as 
needed.
     fn append(&mut self, v: bool) -> Result<()> {
         self.reserve(1)?;
         if v {
@@ -166,7 +301,6 @@ impl BufferBuilderTrait<BooleanType> for 
BufferBuilder<BooleanType> {
         Ok(())
     }
 
-    /// Appends a slice of type `T`, growing the internal buffer as needed.
     fn append_slice(&mut self, slice: &[bool]) -> Result<()> {
         self.reserve(slice.len())?;
         for v in slice {
@@ -183,7 +317,6 @@ impl BufferBuilderTrait<BooleanType> for 
BufferBuilder<BooleanType> {
         Ok(())
     }
 
-    /// Reserves memory for `n` elements of type `T`.
     fn reserve(&mut self, n: usize) -> Result<()> {
         let new_capacity = self.len + n;
         if new_capacity > self.capacity() {
@@ -196,7 +329,6 @@ impl BufferBuilderTrait<BooleanType> for 
BufferBuilder<BooleanType> {
         Ok(())
     }
 
-    /// Reset this builder and returns an immutable `Buffer`.
     fn finish(&mut self) -> Buffer {
         // `append` does not update the buffer's `len` so do it before 
`freeze` is called.
         let new_buffer_len = bit_util::ceil(self.len, 8);
diff --git a/rust/arrow/src/array/mod.rs b/rust/arrow/src/array/mod.rs
index fbe485a..bd3c2e3 100644
--- a/rust/arrow/src/array/mod.rs
+++ b/rust/arrow/src/array/mod.rs
@@ -15,10 +15,37 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Defines public types representing Apache Arrow arrays. Arrow's 
specification defines
-//! an array as "a sequence of values with known length all having the same 
type." For
-//! example, the type `Int16Array` represents an Apache Arrow array of 16-bit 
integers.
+//! The central type in Apache Arrow are arrays, represented
+//! by the [`Array` trait](crate::array::Array).
+//! An array represents a known-length sequence of values all
+//! having the same type.
 //!
+//! Internally, those values are represented by one or several
+//! [buffers](crate::buffer::Buffer), the number and meaning
+//! of which depend on the array’s data type, as documented in
+//! [the Arrow data layout 
specification](https://arrow.apache.org/docs/format/Columnar.html).
+//! For example, the type `Int16Array` represents an Apache
+//! Arrow array of 16-bit integers.
+//!
+//! Those buffers consist of the value data itself and an
+//! optional [bitmap buffer](crate::bitmap::Bitmap) that
+//! indicates which array entries are null values.
+//! The bitmap buffer can be entirely omitted if the array is
+//! known to have zero null values.
+//!
+//! There are concrete implementations of this trait for each
+//! data type, that help you access individual values of the
+//! array.
+//!
+//! # Building an Array
+//!
+//! Arrow's `Arrays` are immutable, but there is the trait
+//! [`ArrayBuilder`](crate::array::ArrayBuilder)
+//! that helps you with constructing new `Arrays`. As with the
+//! `Array` trait, there are builder implementations for all
+//! concrete array types.
+//!
+//! # Example
 //! ```
 //! extern crate arrow;
 //!
diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs
index 0ee8cce..03d9dca 100644
--- a/rust/arrow/src/datatypes.rs
+++ b/rust/arrow/src/datatypes.rs
@@ -15,11 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Defines the data-types of Arrow arrays.
+//! Defines the logical data types of Arrow arrays.
 //!
-//! For an overview of the terminology used within the arrow project and more 
general
-//! information regarding data-types and memory layouts see
-//! [here](https://arrow.apache.org/docs/memory_layout.html).
+//! The most important things you might be looking for are:
+//!  * [`Schema`](crate::datatypes::Schema) to describe a schema.
+//!  * [`Field`](crate::datatypes::Field) to describe one field withing a 
schema.
+//!  * [`DataType`](crate::datatypes::DataType) to describe the type of a 
field.
 
 use std::collections::HashMap;
 use std::fmt;
@@ -39,7 +40,11 @@ use serde_json::{
 
 use crate::error::{ArrowError, Result};
 
-/// The possible relative types that are supported.
+/// The set of datatypes that are supported by this implementation of Apache 
Arrow.
+///
+/// The Arrow specification on data types includes some more types.
+/// See also 
[`Schema.fbs`](https://github.com/apache/arrow/blob/master/format/Schema.fbs)
+/// for Arrow's specification.
 ///
 /// The variants of this enum include primitive fixed size types as well as 
parametric or
 /// nested types.
@@ -49,55 +54,108 @@ use crate::error::{ArrowError, Result};
 ///
 /// Nested types can themselves be nested within other arrays.
 /// For more information on these types please see
-/// [here](https://arrow.apache.org/docs/memory_layout.html).
+/// [the physical memory layout of Apache 
Arrow](https://arrow.apache.org/docs/format/Columnar.html#physical-memory-layout).
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, 
PartialOrd, Ord)]
 pub enum DataType {
+    /// A boolean datatype representing the values `true` and `false`.
     Boolean,
+    /// A signed 8-bit integer.
     Int8,
+    /// A signed 16-bit integer.
     Int16,
+    /// A signed 32-bit integer.
     Int32,
+    /// A signed 64-bit integer.
     Int64,
+    /// An unsigned 8-bit integer.
     UInt8,
+    /// An unsigned 16-bit integer.
     UInt16,
+    /// An unsigned 32-bit integer.
     UInt32,
+    /// An unsigned 64-bit integer.
     UInt64,
+    /// A 16-bit floating point number.
     Float16,
+    /// A 32-bit floating point number.
     Float32,
+    /// A 64-bit floating point number.
     Float64,
-    /// A timestamp with an optional timezone
+    /// A timestamp with an optional timezone.
+    ///
+    /// Time is measured as a Unix epoch, counting the seconds from
+    /// 00:00:00.000 on 1 January 1970, excluding leap seconds,
+    /// as a 64-bit integer.
+    ///
+    /// The time zone is a string indicating the name of a time zone, one of:
+    ///
+    /// * As used in the Olson time zone database (the "tz database" or
+    ///   "tzdata"), such as "America/New_York"
+    /// * An absolute time zone offset of the form +XX:XX or -XX:XX, such as 
+07:30
     Timestamp(TimeUnit, Option<Arc<String>>),
+    /// A 32-bit date representing the elapsed time since UNIX epoch 
(1970-01-01)
+    /// in days (32 bits).
     Date32(DateUnit),
+    /// A 64-bit date representing the elapsed time since UNIX epoch 
(1970-01-01)
+    /// in milliseconds (64 bits).
     Date64(DateUnit),
+    /// A 32-bit time representing the elapsed time since midnight in the unit 
of `TimeUnit`.
     Time32(TimeUnit),
+    /// A 64-bit time representing the elapsed time since midnight in the unit 
of `TimeUnit`.
     Time64(TimeUnit),
+    /// Measure of elapsed time in either seconds, milliseconds, microseconds 
or nanoseconds.
     Duration(TimeUnit),
+    /// A "calendar" interval which models types that don't necessarily
+    /// have a precise duration without the context of a base timestamp (e.g.
+    /// days can differ in length during day light savings time transitions).
     Interval(IntervalUnit),
+    /// Opaque binary data of variable length.
     Binary,
+    /// Opaque binary data of fixed size.
+    /// Enum parameter specifies the number of bytes per value.
     FixedSizeBinary(i32),
+    /// A variable-length string in Unicode with UTF-8 encoding.
     Utf8,
+    /// A list of some logical data type with variable length.
     List(Box<DataType>),
+    /// A list of some logical data type with fixed length.
     FixedSizeList(Box<DataType>, i32),
+    /// A nested datatype that contains a number of sub-fields.
     Struct(Vec<Field>),
     Dictionary(Box<DataType>, Box<DataType>),
 }
 
+/// Date is either a 32-bit or 64-bit type representing elapsed time since UNIX
+/// epoch (1970-01-01) in days or milliseconds.
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, 
PartialOrd, Ord)]
 pub enum DateUnit {
+    /// Days since the UNIX epoch.
     Day,
+    /// Milliseconds indicating UNIX time elapsed since the epoch (no
+    /// leap seconds), where the values are evenly divisible by 86400000.
     Millisecond,
 }
 
+/// An absolute length of time in seconds, milliseconds, microseconds or 
nanoseconds.
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, 
PartialOrd, Ord)]
 pub enum TimeUnit {
+    /// Time in seconds.
     Second,
+    /// Time in milliseconds.
     Millisecond,
+    /// Time in microseconds.
     Microsecond,
+    /// Time in nanoseconds.
     Nanosecond,
 }
 
+/// YEAR_MONTH or DAY_TIME interval in SQL style.
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, 
PartialOrd, Ord)]
 pub enum IntervalUnit {
+    /// Indicates the number of elapsed whole months, stored as 4-byte 
integers.
     YearMonth,
+    /// Indicates the number of elapsed days and milliseconds,
+    /// stored as 2 contiguous 32-bit integers (8-bytes in total).
     DayTime,
 }
 
@@ -477,22 +535,22 @@ where
         op: F,
     ) -> Self::Simd;
 
-    // SIMD version of equal
+    /// SIMD version of equal
     fn eq(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;
 
-    // SIMD version of not equal
+    /// SIMD version of not equal
     fn ne(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;
 
-    // SIMD version of less than
+    /// SIMD version of less than
     fn lt(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;
 
-    // SIMD version of less than or equal to
+    /// SIMD version of less than or equal to
     fn le(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;
 
-    // SIMD version of greater than
+    /// SIMD version of greater than
     fn gt(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;
 
-    // SIMD version of greater than or equal to
+    /// SIMD version of greater than or equal to
     fn ge(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;
 
     /// Writes a SIMD result back to a slice
@@ -638,8 +696,9 @@ impl ArrowTemporalType for Time64NanosecondType {}
 // impl ArrowTemporalType for IntervalYearMonthType {}
 // impl ArrowTemporalType for IntervalDayTimeType {}
 
-/// A timestamp type allows us to create array builders that take a timestamp
+/// A timestamp type allows us to create array builders that take a timestamp.
 pub trait ArrowTimestampType: ArrowTemporalType {
+    /// Returns the `TimeUnit` of this timestamp.
     fn get_time_unit() -> TimeUnit;
 }
 
@@ -1322,6 +1381,7 @@ impl fmt::Display for Schema {
     }
 }
 
+/// A reference-counted reference to a [`Schema`](crate::datatypes::Schema).
 pub type SchemaRef = Arc<Schema>;
 
 #[cfg(test)]
diff --git a/rust/arrow/src/record_batch.rs b/rust/arrow/src/record_batch.rs
index 8cfa225..9ae6505 100644
--- a/rust/arrow/src/record_batch.rs
+++ b/rust/arrow/src/record_batch.rs
@@ -15,11 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! According to the [Arrow Metadata 
Specification](https://arrow.apache.org/docs/metadata.html):
-//!
-//! > A record batch is a collection of top-level named, equal length Arrow 
arrays
-//! > (or vectors). If one of the arrays contains nested data, its child 
arrays are not
-//! > required to be the same length as the top-level arrays.
+//! A two-dimensional batch of column-oriented data with a defined
+//! [schema](crate::datatypes::Schema).
 
 use std::sync::Arc;
 
@@ -27,7 +24,18 @@ use crate::array::*;
 use crate::datatypes::*;
 use crate::error::{ArrowError, Result};
 
-/// A batch of column-oriented data
+/// A two-dimensional batch of column-oriented data with a defined
+/// [schema](crate::datatypes::Schema).
+///
+/// A `RecordBatch` is a two-dimensional dataset of a number of
+/// contiguous arrays, each the same length.
+/// A record batch has a schema which must match its arrays’
+/// datatypes.
+///
+/// Record batches are a convenient unit of work for various
+/// serialization and computation functions, possibly incremental.  
+/// See also [CSV reader](crate::csv::Reader) and
+/// [JSON reader](crate::json::Reader).
 #[derive(Clone)]
 pub struct RecordBatch {
     schema: Arc<Schema>,
@@ -35,12 +43,37 @@ pub struct RecordBatch {
 }
 
 impl RecordBatch {
-    /// Creates a `RecordBatch` from a schema and columns
+    /// Creates a `RecordBatch` from a schema and columns.
     ///
     /// Expects the following:
     ///  * the vec of columns to not be empty
-    ///  * the schema and column data types to have equal lengths and match
+    ///  * the schema and column data types to have equal lengths
+    ///    and match
     ///  * each array in columns to have the same length
+    ///
+    /// If the conditions are not met, an error is returned.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use std::sync::Arc;
+    /// use arrow::array::Int32Array;
+    /// use arrow::datatypes::{Schema, Field, DataType};
+    /// use arrow::record_batch::RecordBatch;
+    ///
+    /// # fn main() -> arrow::error::Result<()> {
+    /// let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
+    /// let schema = Schema::new(vec![
+    ///     Field::new("id", DataType::Int32, false)
+    /// ]);
+    ///
+    /// let batch = RecordBatch::try_new(
+    ///     Arc::new(schema),
+    ///     vec![Arc::new(id_array)]
+    /// )?;
+    /// # Ok(())
+    /// # }
+    /// ```
     pub fn try_new(schema: Arc<Schema>, columns: Vec<ArrayRef>) -> 
Result<Self> {
         // check that there are some columns
         if columns.is_empty() {
@@ -74,27 +107,77 @@ impl RecordBatch {
         Ok(RecordBatch { schema, columns })
     }
 
-    /// Returns the schema of the record batch
+    /// Returns the [`Schema`](crate::datatypes::Schema) of the record batch.
     pub fn schema(&self) -> &Arc<Schema> {
         &self.schema
     }
 
-    /// Number of columns in the record batch
+    /// Returns the number of columns in the record batch.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use std::sync::Arc;
+    /// use arrow::array::Int32Array;
+    /// use arrow::datatypes::{Schema, Field, DataType};
+    /// use arrow::record_batch::RecordBatch;
+    ///
+    /// # fn main() -> arrow::error::Result<()> {
+    /// let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
+    /// let schema = Schema::new(vec![
+    ///     Field::new("id", DataType::Int32, false)
+    /// ]);
+    ///
+    /// let batch = RecordBatch::try_new(Arc::new(schema), 
vec![Arc::new(id_array)])?;
+    ///
+    /// assert_eq!(batch.num_columns(), 1);
+    /// # Ok(())
+    /// # }
+    /// ```
     pub fn num_columns(&self) -> usize {
         self.columns.len()
     }
 
-    /// Number of rows in each column
+    /// Returns the number of rows in each column.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the `RecordBatch` contains no columns.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use std::sync::Arc;
+    /// use arrow::array::Int32Array;
+    /// use arrow::datatypes::{Schema, Field, DataType};
+    /// use arrow::record_batch::RecordBatch;
+    ///
+    /// # fn main() -> arrow::error::Result<()> {
+    /// let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
+    /// let schema = Schema::new(vec![
+    ///     Field::new("id", DataType::Int32, false)
+    /// ]);
+    ///
+    /// let batch = RecordBatch::try_new(Arc::new(schema), 
vec![Arc::new(id_array)])?;
+    ///
+    /// assert_eq!(batch.num_rows(), 5);
+    /// # Ok(())
+    /// # }
+    /// ```
     pub fn num_rows(&self) -> usize {
         self.columns[0].data().len()
     }
 
-    /// Get a reference to a column's array by index
-    pub fn column(&self, i: usize) -> &ArrayRef {
-        &self.columns[i]
+    /// Get a reference to a column's array by index.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `index` is outside of `0..num_columns`.
+    pub fn column(&self, index: usize) -> &ArrayRef {
+        &self.columns[index]
     }
 
-    /// Get a reference to all columns
+    /// Get a reference to all columns in the record batch.
     pub fn columns(&self) -> &[ArrayRef] {
         &self.columns[..]
     }

Reply via email to