Jefffrey commented on code in PR #5468: URL: https://github.com/apache/arrow-datafusion/pull/5468#discussion_r1125562538
########## datafusion/core/src/dataframe.rs: ########## @@ -344,24 +344,42 @@ impl DataFrame { .collect::<Vec<_>>(); describe_schemas.insert(0, Field::new("describe", DataType::Utf8, false)); + //count aggregation + let cnt = self.clone().aggregate( + vec![], + original_schema_fields + .clone() + .map(|f| count(col(f.name()))) + .collect::<Vec<_>>(), + )?; + // The optimization of AggregateStatistics will rewrite the physical plan + // for the count function and ignore alias functions, + // as shown in https://github.com/apache/arrow-datafusion/issues/5444. + // This logic should be removed when #5444 is fixed. + let cnt = cnt + .clone() + .select( + cnt.schema() + .fields() + .iter() + .zip(original_schema_fields.clone()) + .map(|(count_field, orgin_field)| { + col(count_field.name()).alias(orgin_field.name()) + }) + .collect::<Vec<_>>(), + ) + .unwrap(); Review Comment: ```suggestion )?; ``` Instead of unwrap ########## datafusion/core/src/dataframe.rs: ########## @@ -344,24 +344,42 @@ impl DataFrame { .collect::<Vec<_>>(); describe_schemas.insert(0, Field::new("describe", DataType::Utf8, false)); + //count aggregation + let cnt = self.clone().aggregate( + vec![], + original_schema_fields + .clone() + .map(|f| count(col(f.name()))) Review Comment: ```suggestion .map(|f| count(col(f.name())).alias(f.name())) ``` Maybe keep old way to make reverting easier? Since workaround should still work with this ########## datafusion/core/src/dataframe.rs: ########## @@ -344,24 +344,42 @@ impl DataFrame { .collect::<Vec<_>>(); describe_schemas.insert(0, Field::new("describe", DataType::Utf8, false)); + //count aggregation + let cnt = self.clone().aggregate( + vec![], + original_schema_fields + .clone() + .map(|f| count(col(f.name()))) + .collect::<Vec<_>>(), + )?; + // The optimization of AggregateStatistics will rewrite the physical plan + // for the count function and ignore alias functions, + // as shown in https://github.com/apache/arrow-datafusion/issues/5444. + // This logic should be removed when #5444 is fixed. + let cnt = cnt + .clone() + .select( + cnt.schema() + .fields() + .iter() + .zip(original_schema_fields.clone()) + .map(|(count_field, orgin_field)| { + col(count_field.name()).alias(orgin_field.name()) + }) + .collect::<Vec<_>>(), + ) + .unwrap(); + //should be removed when #5444 is fixed + //collect recordBatch let describe_record_batch = vec![ // count aggregation - self.clone() - .aggregate( - vec![], - fields_iter - .clone() - .map(|f| count(col(f.name())).alias(f.name())) - .collect::<Vec<_>>(), - )? - .collect() - .await?, + cnt.collect().await.unwrap(), Review Comment: ```suggestion cnt.collect().await?, ``` Instead of unwrap -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org