ngli-me commented on code in PR #6758:
URL: https://github.com/apache/arrow-rs/pull/6758#discussion_r1892195224
##########
arrow-array/src/record_batch.rs:
##########
@@ -394,6 +396,56 @@ impl RecordBatch {
)
}
+ /// Normalize a semi-structured [`RecordBatch`] into a flat table.
+ ///
+ /// If max_level is 0, normalizes all levels.
+ pub fn normalize(&self, separator: &str, mut max_level: usize) ->
Result<Self, ArrowError> {
+ if max_level == 0 {
+ max_level = usize::MAX;
+ }
+ if self.num_rows() == 0 {
+ // No data, only need to normalize the schema
+ return Ok(Self::new_empty(Arc::new(
+ self.schema.normalize(separator, max_level)?,
+ )));
+ }
+ let mut queue: VecDeque<(usize, (ArrayRef, FieldRef))> =
VecDeque::new();
+
+ for (c, f) in self.columns.iter().zip(self.schema.fields()) {
+ queue.push_back((0, ((*c).clone(), (*f).clone())));
+ }
+
+ let mut columns: Vec<ArrayRef> = Vec::new();
+ let mut fields: Vec<FieldRef> = Vec::new();
+
+ while let Some((depth, (c, f))) = queue.pop_front() {
+ if depth < max_level {
+ match f.data_type() {
+ DataType::Struct(ff) => {
+ // Need to zip these in reverse to maintain original
order
+ for (cff, fff) in
c.as_struct().columns().iter().zip(ff.into_iter()).rev() {
+ let new_key = format!("{}{}{}", f.name(),
separator, fff.name());
Review Comment:
I think this is a good point, this is definitely not my favorite way to do
this. I'll have to do some testing and think about it some more, but it may be
better to construct the queue with the components of the `Field`, then go
through and construct all of the `Field`s at the very end.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]