Jefffrey commented on code in PR #6758:
URL: https://github.com/apache/arrow-rs/pull/6758#discussion_r1922131199


##########
arrow-array/src/record_batch.rs:
##########
@@ -394,6 +395,97 @@ impl RecordBatch {
         )
     }
 
+    /// Normalize a semi-structured [`RecordBatch`] into a flat table.
+    ///
+    /// `separator`: Nested [`Field`]s will generate names separated by 
`separator`, e.g. for
+    /// separator= "." and the schema:
+    /// ```text
+    ///     "foo": StructArray<"bar": Utf8>
+    /// ```
+    /// will generate:
+    /// ```text
+    ///     "foo.bar": Utf8
+    /// ```
+    /// `max_level`: The maximum number of levels (depth of the `Schema` and 
`Columns`) to
+    /// normalize. If `0`, normalizes all levels.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use arrow_array::{ArrayRef, Int64Array, StringArray, StructArray, 
RecordBatch};
+    /// # use arrow_schema::{DataType, Field, Fields, Schema};
+    ///
+    /// let animals: ArrayRef = Arc::new(StringArray::from(vec!["Parrot", 
""]));
+    /// let n_legs: ArrayRef = Arc::new(Int64Array::from(vec![Some(2), 
Some(4)]));
+    ///
+    /// let animals_field = Arc::new(Field::new("animals", DataType::Utf8, 
true));
+    /// let n_legs_field = Arc::new(Field::new("n_legs", DataType::Int64, 
true));
+    ///
+    /// let a = Arc::new(StructArray::from(vec![
+    ///     (animals_field.clone(), Arc::new(animals.clone()) as ArrayRef),
+    ///     (n_legs_field.clone(), Arc::new(n_legs.clone()) as ArrayRef),
+    /// ]));
+    ///
+    /// let schema = Schema::new(vec![
+    ///     Field::new(
+    ///         "a",
+    ///         DataType::Struct(Fields::from(vec![animals_field, 
n_legs_field])),
+    ///         false,
+    ///     )
+    /// ]);
+    ///
+    /// let normalized = RecordBatch::try_new(Arc::new(schema), vec![a])
+    ///     .expect("valid conversion")
+    ///     .normalize(".", None)
+    ///     .expect("valid normalization");
+    ///
+    /// let expected = RecordBatch::try_from_iter_with_nullable(vec![
+    ///     ("a.animals", animals.clone(), true),
+    ///     ("a.n_legs", n_legs.clone(), true),
+    /// ])
+    /// .expect("valid conversion");
+    ///
+    /// assert_eq!(expected, normalized);
+    /// ```
+    pub fn normalize(&self, separator: &str, max_level: Option<usize>) -> 
Result<Self, ArrowError> {
+        let max_level = match max_level.unwrap_or(usize::MAX) {
+            0 => usize::MAX,
+            val => val,
+        };
+        let mut stack: Vec<(usize, &ArrayRef, Vec<&str>, &FieldRef)> = 
Vec::new();
+        for (c, f) in self.columns.iter().zip(self.schema.fields()).rev() {
+            let name_vec: Vec<&str> = vec![f.name()];
+            stack.push((0, c, name_vec, f));
+        }

Review Comment:
   ```suggestion
           let mut stack: Vec<(usize, &ArrayRef, Vec<&str>, &FieldRef)> = self
               .columns
               .iter()
               .zip(self.schema.fields())
               .rev()
               .map(|(c, f)| {
                   let name_vec: Vec<&str> = vec![f.name()];
                   (0, c, name_vec, f)
               })
               .collect();
   ```
   
   Just to ensure the vec created has the right capacity from the start



##########
arrow-array/src/record_batch.rs:
##########
@@ -394,6 +395,97 @@ impl RecordBatch {
         )
     }
 
+    /// Normalize a semi-structured [`RecordBatch`] into a flat table.
+    ///
+    /// `separator`: Nested [`Field`]s will generate names separated by 
`separator`, e.g. for
+    /// separator= "." and the schema:
+    /// ```text
+    ///     "foo": StructArray<"bar": Utf8>
+    /// ```
+    /// will generate:
+    /// ```text
+    ///     "foo.bar": Utf8
+    /// ```
+    /// `max_level`: The maximum number of levels (depth of the `Schema` and 
`Columns`) to
+    /// normalize. If `0`, normalizes all levels.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use arrow_array::{ArrayRef, Int64Array, StringArray, StructArray, 
RecordBatch};
+    /// # use arrow_schema::{DataType, Field, Fields, Schema};
+    ///
+    /// let animals: ArrayRef = Arc::new(StringArray::from(vec!["Parrot", 
""]));
+    /// let n_legs: ArrayRef = Arc::new(Int64Array::from(vec![Some(2), 
Some(4)]));
+    ///
+    /// let animals_field = Arc::new(Field::new("animals", DataType::Utf8, 
true));
+    /// let n_legs_field = Arc::new(Field::new("n_legs", DataType::Int64, 
true));
+    ///
+    /// let a = Arc::new(StructArray::from(vec![
+    ///     (animals_field.clone(), Arc::new(animals.clone()) as ArrayRef),
+    ///     (n_legs_field.clone(), Arc::new(n_legs.clone()) as ArrayRef),
+    /// ]));
+    ///
+    /// let schema = Schema::new(vec![
+    ///     Field::new(
+    ///         "a",
+    ///         DataType::Struct(Fields::from(vec![animals_field, 
n_legs_field])),
+    ///         false,
+    ///     )
+    /// ]);
+    ///
+    /// let normalized = RecordBatch::try_new(Arc::new(schema), vec![a])
+    ///     .expect("valid conversion")
+    ///     .normalize(".", None)
+    ///     .expect("valid normalization");
+    ///
+    /// let expected = RecordBatch::try_from_iter_with_nullable(vec![
+    ///     ("a.animals", animals.clone(), true),
+    ///     ("a.n_legs", n_legs.clone(), true),
+    /// ])
+    /// .expect("valid conversion");
+    ///
+    /// assert_eq!(expected, normalized);
+    /// ```

Review Comment:
   ```suggestion
       /// Normalize a semi-structured [`RecordBatch`] into a flat table.
       ///
       /// Nested [`Field`]s will generate names separated by `separator`, up 
to a depth of `max_level`
       /// (unlimited if `None`).
       ///
       /// e.g. given a [`RecordBatch`] with schema:
       ///
       /// ```text
       ///     "foo": StructArray<"bar": Utf8>
       /// ```
       ///
       /// A separator of `"."` would generate a batch with the schema:
       ///
       /// ```text
       ///     "foo.bar": Utf8
       /// ```
       ///
       /// Note that giving a depth of `Some(0)` to `max_level` is the same as 
passing in `None`;
       /// it will be treated as unlimited.
       ///
       /// # Example
       ///
       /// ```
       /// # use std::sync::Arc;
       /// # use arrow_array::{ArrayRef, Int64Array, StringArray, StructArray, 
RecordBatch};
       /// # use arrow_schema::{DataType, Field, Fields, Schema};
       /// #
       /// let animals: ArrayRef = Arc::new(StringArray::from(vec!["Parrot", 
""]));
       /// let n_legs: ArrayRef = Arc::new(Int64Array::from(vec![Some(2), 
Some(4)]));
       ///
       /// let animals_field = Arc::new(Field::new("animals", DataType::Utf8, 
true));
       /// let n_legs_field = Arc::new(Field::new("n_legs", DataType::Int64, 
true));
       ///
       /// let a = Arc::new(StructArray::from(vec![
       ///     (animals_field.clone(), Arc::new(animals.clone()) as ArrayRef),
       ///     (n_legs_field.clone(), Arc::new(n_legs.clone()) as ArrayRef),
       /// ]));
       ///
       /// let schema = Schema::new(vec![
       ///     Field::new(
       ///         "a",
       ///         DataType::Struct(Fields::from(vec![animals_field, 
n_legs_field])),
       ///         false,
       ///     )
       /// ]);
       ///
       /// let normalized = RecordBatch::try_new(Arc::new(schema), vec![a])
       ///     .expect("valid conversion")
       ///     .normalize(".", None)
       ///     .expect("valid normalization");
       ///
       /// let expected = RecordBatch::try_from_iter_with_nullable(vec![
       ///     ("a.animals", animals.clone(), true),
       ///     ("a.n_legs", n_legs.clone(), true),
       /// ])
       /// .expect("valid conversion");
       ///
       /// assert_eq!(expected, normalized);
       /// ```
   ```
   
   Just some adjustments to documentation to make it flow a bit better



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to