This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 773cf18e2ae feat: clarifying comments in struct_builder.rs #5494
(#5499)
773cf18e2ae is described below
commit 773cf18e2aeb8f299585496d4cf8a1a1f8262edb
Author: Istvan Fodor <[email protected]>
AuthorDate: Thu Mar 14 21:45:49 2024 -0500
feat: clarifying comments in struct_builder.rs #5494 (#5499)
* feat: clarifying comments in struct_builder.rs
Added clarifying comments to StructBuilder about creating collection columns
* fixed commented line, improved comments
* Removed redundant line in comment
* fixed slightly misleading comment
* moved example code to comment
* better comment
* fixed comment type
---
arrow-array/src/builder/struct_builder.rs | 77 +++++++++++++++++++++++++++++++
1 file changed, 77 insertions(+)
diff --git a/arrow-array/src/builder/struct_builder.rs
b/arrow-array/src/builder/struct_builder.rs
index eeb37cd8e66..1e2e402f745 100644
--- a/arrow-array/src/builder/struct_builder.rs
+++ b/arrow-array/src/builder/struct_builder.rs
@@ -25,6 +25,81 @@ use std::sync::Arc;
///
/// Note that callers should make sure that methods of all the child field
builders are
/// properly called to maintain the consistency of the data structure.
+///
+///
+/// Handling arrays with complex layouts, such as
`List<Struct<List<Struct>>>`, in Rust can be challenging due to its strong
typing system.
+/// To construct a collection builder ([`ListBuilder`], [`LargeListBuilder`],
or [`MapBuilder`]) using [`make_builder`], multiple calls are required. This
complexity arises from the recursive approach utilized by
[`StructBuilder::from_fields`].
+///
+/// Initially, [`StructBuilder::from_fields`] invokes [`make_builder`], which
returns a `Box<dyn ArrayBuilder>`. To obtain the specific collection builder,
one must first use [`StructBuilder::field_builder`] to get a
`Collection<[Box<dyn ArrayBuilder>]>`. Subsequently, the `values()` result from
this operation can be downcast to the desired builder type.
+///
+/// For example, when working with [`ListBuilder`], you would first call
[`StructBuilder::field_builder::<ListBuilder<Box<dyn ArrayBuilder>>>`] and then
downcast the [`Box<dyn ArrayBuilder>`] to the specific [`StructBuilder`] you
need.
+///
+/// For a practical example see the code below:
+///
+/// ```rust
+/// use arrow_array::builder::{ArrayBuilder, ListBuilder, StringBuilder,
StructBuilder};
+/// use arrow_schema::{DataType, Field, Fields};
+/// use std::sync::Arc;
+///
+/// // This is an example column that has a List<Struct<List<Struct>>>
layout
+/// let mut example_col = ListBuilder::new(StructBuilder::from_fields(
+/// vec![Field::new(
+/// "value_list",
+/// DataType::List(Arc::new(Field::new(
+/// "item",
+/// DataType::Struct(Fields::from(vec![
+/// Field::new("key", DataType::Utf8, true),
+/// Field::new("value", DataType::Utf8, true),
+/// ])), //In this example we are trying to get to this builder
and insert key/value pairs
+/// true,
+/// ))),
+/// true,
+/// )],
+/// 0,
+/// ));
+///
+/// // We can obtain the StructBuilder without issues, because example_col
was created with StructBuilder
+/// let col_struct_builder: &mut StructBuilder = example_col.values();
+///
+/// // We can't obtain the ListBuilder<StructBuilder> with the expected
generic types, because under the hood
+/// // the StructBuilder was returned as a Box<dyn ArrayBuilder> and passed
as such to the ListBuilder constructor
+///
+/// // This panics in runtime, even though we know that the builder is a
ListBuilder<StructBuilder>.
+/// // let sb = col_struct_builder
+/// // .field_builder::<ListBuilder<StructBuilder>>(0)
+/// // .as_mut()
+/// // .unwrap();
+///
+/// //To keep in line with Rust's strong typing, we fetch a
ListBuilder<Box<dyn ArrayBuilder>> from the column StructBuilder first...
+/// let mut list_builder_option =
+/// col_struct_builder.field_builder::<ListBuilder<Box<dyn
ArrayBuilder>>>(0);
+///
+/// let list_builder = list_builder_option.as_mut().unwrap();
+///
+/// // ... and then downcast the key/value pair values to a StructBuilder
+/// let struct_builder = list_builder
+/// .values()
+/// .as_any_mut()
+/// .downcast_mut::<StructBuilder>()
+/// .unwrap();
+///
+/// // We can now append values to the StructBuilder
+/// let key_builder =
struct_builder.field_builder::<StringBuilder>(0).unwrap();
+/// key_builder.append_value("my key");
+///
+/// let value_builder =
struct_builder.field_builder::<StringBuilder>(1).unwrap();
+/// value_builder.append_value("my value");
+///
+/// struct_builder.append(true);
+/// list_builder.append(true);
+/// col_struct_builder.append(true);
+/// example_col.append(true);
+///
+/// let array = example_col.finish();
+///
+/// println!("My array: {:?}", array);
+/// ```
+///
pub struct StructBuilder {
fields: Fields,
field_builders: Vec<Box<dyn ArrayBuilder>>,
@@ -88,6 +163,8 @@ impl ArrayBuilder for StructBuilder {
/// Returns a builder with capacity `capacity` that corresponds to the
datatype `DataType`
/// This function is useful to construct arrays from an arbitrary vectors with
known/expected
/// schema.
+///
+/// See comments on StructBuilder on how to retreive collection builders built
by make_builder.
pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn
ArrayBuilder> {
use crate::builder::*;
match datatype {