alamb commented on a change in pull request #1776:
URL: https://github.com/apache/arrow-datafusion/pull/1776#discussion_r801931475
##########
File path: datafusion/src/physical_plan/mod.rs
##########
@@ -142,29 +142,64 @@ pub trait ExecutionPlan: Debug + Send + Sync {
/// Specifies the output partitioning scheme of this plan
fn output_partitioning(&self) -> Partitioning;
+ /// If the output of this operator is sorted, returns `Some(keys)`
+ /// with the description of how it was sorted.
+ ///
+ /// For example, Sort, (obviously) produces sorted output as does
+ /// SortPreservingMergeStream. Less obviously `Projection`
+ /// produces sorted output if its input was sorted as it does not
+ /// reorder the input rows
+ fn output_ordering(&self) -> Option<&[PhysicalSortExpr]>;
+
/// Specifies the data distribution requirements of all the children for
this operator
fn required_child_distribution(&self) -> Distribution {
Distribution::UnspecifiedDistribution
}
- /// Returns `true` if the direct children of this `ExecutionPlan` should
be repartitioned
- /// to introduce greater concurrency to the plan
+ /// Returns `true` if this operator relies on its inputs being
+ /// produced in a certain order (for example that they are sorted a
particular way) for correctness.
///
- /// The default implementation returns `true` unless
`Self::required_child_distribution`
- /// returns `Distribution::SinglePartition`
+ /// If `true` is returned, DataFusion will not apply certain
+ /// optimizations which might reorder the inputs (such as
+ /// repartitioning to increase concurrency).
///
- /// Operators that do not benefit from additional partitioning may want to
return `false`
- fn should_repartition_children(&self) -> bool {
- !matches!(
- self.required_child_distribution(),
- Distribution::SinglePartition
- )
+ /// The default implementation returns `false`
+ fn relies_on_input_order(&self) -> bool {
+ false
Review comment:
done in a809bbddae
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]