andygrove commented on a change in pull request #543:
URL: https://github.com/apache/arrow-datafusion/pull/543#discussion_r659184184
##########
File path: ballista/rust/core/src/execution_plans/query_stage.rs
##########
@@ -150,32 +159,150 @@ impl ExecutionPlan for QueryStageExec {
stats
);
- let schema = Arc::new(Schema::new(vec![
- Field::new("path", DataType::Utf8, false),
- stats.arrow_struct_repr(),
- ]));
+ let schema = result_schema();
// build result set with summary of the partition execution
status
- let mut c0 = StringBuilder::new(1);
- c0.append_value(&path).unwrap();
- let path: ArrayRef = Arc::new(c0.finish());
+ let mut part_builder = UInt32Builder::new(1);
+ part_builder.append_value(partition as u32)?;
+ let part: ArrayRef = Arc::new(part_builder.finish());
+
+ let mut path_builder = StringBuilder::new(1);
+ path_builder.append_value(&path)?;
+ let path: ArrayRef = Arc::new(path_builder.finish());
let stats: ArrayRef = stats
.to_arrow_arrayref()
.map_err(|e| DataFusionError::Execution(format!("{:?}",
e)))?;
- let batch = RecordBatch::try_new(schema.clone(), vec![path,
stats])
+ let batch = RecordBatch::try_new(schema.clone(), vec![part,
path, stats])
.map_err(DataFusionError::ArrowError)?;
Ok(Box::pin(MemoryStream::try_new(vec![batch], schema, None)?))
}
- Some(Partitioning::Hash(_, _)) => {
- //TODO re-use code from RepartitionExec to split each batch
into
- // partitions and write to one IPC file per partition
- // See https://github.com/apache/arrow-datafusion/issues/456
- Err(DataFusionError::NotImplemented(
- "Shuffle partitioning not implemented yet".to_owned(),
- ))
+ Some(Partitioning::Hash(exprs, n)) => {
+ let num_output_partitions = *n;
+
+ // we won't necessary produce output for every possible
partition, so we
+ // create writers on demand
+ let mut writers: Vec<Option<Arc<Mutex<ShuffleWriter>>>> =
vec![];
Review comment:
I tried changing this but ran into ownership issues. I'll go ahead and
merge and perhaps someone can help me with fixing this as a follow up PR.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]