ngli-me commented on code in PR #6758:
URL: https://github.com/apache/arrow-rs/pull/6758#discussion_r1899944152
##########
arrow-array/src/record_batch.rs:
##########
@@ -394,6 +396,56 @@ impl RecordBatch {
)
}
+ /// Normalize a semi-structured [`RecordBatch`] into a flat table.
+ ///
+ /// If max_level is 0, normalizes all levels.
+ pub fn normalize(&self, separator: &str, mut max_level: usize) ->
Result<Self, ArrowError> {
+ if max_level == 0 {
+ max_level = usize::MAX;
+ }
+ if self.num_rows() == 0 {
+ // No data, only need to normalize the schema
+ return Ok(Self::new_empty(Arc::new(
+ self.schema.normalize(separator, max_level)?,
+ )));
+ }
+ let mut queue: VecDeque<(usize, (ArrayRef, FieldRef))> =
VecDeque::new();
+
+ for (c, f) in self.columns.iter().zip(self.schema.fields()) {
+ queue.push_back((0, ((*c).clone(), (*f).clone())));
+ }
+
+ let mut columns: Vec<ArrayRef> = Vec::new();
+ let mut fields: Vec<FieldRef> = Vec::new();
+
+ while let Some((depth, (c, f))) = queue.pop_front() {
+ if depth < max_level {
+ match f.data_type() {
+ DataType::Struct(ff) => {
+ // Need to zip these in reverse to maintain original
order
+ for (cff, fff) in
c.as_struct().columns().iter().zip(ff.into_iter()).rev() {
+ let new_key = format!("{}{}{}", f.name(),
separator, fff.name());
Review Comment:
Added a (hopefully) better approach for this that concats the `Vec<&str>`
when the field is done being processed.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]