alamb commented on code in PR #7566:
URL: https://github.com/apache/arrow-datafusion/pull/7566#discussion_r1327949263
##########
datafusion/core/src/physical_optimizer/enforce_distribution.rs:
##########
@@ -804,36 +803,21 @@ fn try_reorder(
} else if !equivalence_properties.classes().is_empty() {
normalized_expected = expected
.iter()
- .map(|e| {
- normalize_expr_with_equivalence_properties(
- e.clone(),
- equivalence_properties.classes(),
- )
- })
+ .map(|e| equivalence_properties.normalize_expr(e.clone()))
Review Comment:
this looks much nicer !
##########
datafusion/physical-expr/src/equivalence.rs:
##########
@@ -131,17 +210,120 @@ impl<T: Eq + Clone + Hash> EquivalenceProperties<T> {
/// where both `a ASC` and `b DESC` can describe the table ordering. With
/// `OrderingEquivalenceProperties`, we can keep track of these equivalences
/// and treat `a ASC` and `b DESC` as the same ordering requirement.
-pub type OrderingEquivalenceProperties = EquivalenceProperties<LexOrdering>;
+#[derive(Debug, Clone)]
+pub struct OrderingEquivalenceProperties {
+ oeq_class: Option<OrderingEquivalentClass>,
+ /// Keeps track of expressions that have constant value.
+ constants: Vec<Arc<dyn PhysicalExpr>>,
+ schema: SchemaRef,
+}
impl OrderingEquivalenceProperties {
+ /// Create an empty `OrderingEquivalenceProperties`
+ pub fn new(schema: SchemaRef) -> Self {
+ Self {
+ oeq_class: None,
+ constants: vec![],
+ schema,
+ }
+ }
+
+ /// Extends `OrderingEquivalenceProperties` by adding ordering inside the
`other`
+ /// to the `self.oeq_class`.
+ pub fn extend(&mut self, other: Option<OrderingEquivalentClass>) {
+ if let Some(other) = other {
+ if let Some(class) = &mut self.oeq_class {
+ class.others.insert(other.head);
+ class.others.extend(other.others);
+ } else {
+ self.oeq_class = Some(other);
+ }
+ }
+ }
+
+ pub fn oeq_class(&self) -> Option<&OrderingEquivalentClass> {
+ self.oeq_class.as_ref()
+ }
+
+ /// Adds new equal conditions into the EquivalenceProperties. New equal
+ /// conditions usually come from equality predicates in a join/filter.
+ pub fn add_equal_conditions(&mut self, new_conditions: (&LexOrdering,
&LexOrdering)) {
+ if let Some(class) = &mut self.oeq_class {
+ class.insert(new_conditions.0.clone());
+ class.insert(new_conditions.1.clone());
+ } else {
+ let head = new_conditions.0.clone();
+ let others = vec![new_conditions.1.clone()];
+ self.oeq_class = Some(OrderingEquivalentClass::new(head, others))
+ }
+ }
+
+ /// Add physical expression that have constant value to the
`self.constants`
+ pub fn with_constants(mut self, constants: Vec<Arc<dyn PhysicalExpr>>) ->
Self {
+ constants.into_iter().for_each(|constant| {
+ if !physical_exprs_contains(&self.constants, &constant) {
+ self.constants.push(constant);
+ }
+ });
+ self
+ }
+
+ pub fn schema(&self) -> SchemaRef {
+ self.schema.clone()
+ }
+
+ /// This function normalizes `sort_reqs` by
+ /// - removing expressions that have constant value from requirement
+ /// - replacing sections that are in the `self.oeq_class.others` with
`self.oeq_class.head`
+ /// - removing sections that satisfies global ordering that are in the
post fix of requirement
+ pub fn normalize_sort_requirements(
+ &self,
+ sort_reqs: &[PhysicalSortRequirement],
+ ) -> Vec<PhysicalSortRequirement> {
+ let normalized_sort_reqs =
+ prune_sort_reqs_with_constants(sort_reqs, &self.constants);
+ let mut normalized_sort_reqs = collapse_lex_req(normalized_sort_reqs);
+ if let Some(oeq_class) = &self.oeq_class {
+ for item in oeq_class.others() {
+ let item = PhysicalSortRequirement::from_sort_exprs(item);
+ let item = prune_sort_reqs_with_constants(&item,
&self.constants);
+ let ranges = get_compatible_ranges(&normalized_sort_reqs,
&item);
+ let mut offset: i64 = 0;
Review Comment:
why use an `i64` here? It seems like if `offset` was always a `usize` the
math below would be much simpler as the casting `as usize` and `as i64` could
be avoided?
##########
datafusion/core/src/physical_plan/filter.rs:
##########
@@ -197,7 +210,14 @@ impl ExecutionPlan for FilterExec {
let input_stats = self.input.statistics();
let input_column_stats = match input_stats.column_statistics {
Some(stats) => stats,
- None => return Statistics::default(),
+ None => self
Review Comment:
this seems like it is more general than just for `FilterExec` -- shouldn't
all ExecutionPlan's return statistics that have unbounded columns in the
absence of other information? Maybe we should change `Statistics::default()` to
do this 🤔
##########
datafusion/common/src/stats.rs:
##########
@@ -70,3 +72,24 @@ pub struct ColumnStatistics {
/// Number of distinct values
pub distinct_count: Option<usize>,
}
+
+impl ColumnStatistics {
+ pub fn is_singleton(&self) -> bool {
Review Comment:
I recomment a docstring explaning what a singleton is (e.g. that the column
contains a single, non null value)
##########
datafusion/core/src/physical_plan/filter.rs:
##########
@@ -197,7 +210,14 @@ impl ExecutionPlan for FilterExec {
let input_stats = self.input.statistics();
let input_column_stats = match input_stats.column_statistics {
Some(stats) => stats,
- None => return Statistics::default(),
+ None => self
Review Comment:
this seems like it is more general than just for `FilterExec` -- shouldn't
all ExecutionPlan's return statistics that have unbounded columns in the
absence of other information? Maybe we should change `Statistics::default()` to
do this 🤔
##########
datafusion/core/src/physical_plan/joins/utils.rs:
##########
@@ -324,66 +322,24 @@ pub fn cross_join_equivalence_properties(
///
/// This way; once we normalize an expression according to equivalence
properties,
/// it can thereafter safely be used for ordering equivalence normalization.
-fn get_updated_right_ordering_equivalence_properties(
+fn get_updated_right_ordering_equivalent_class(
join_type: &JoinType,
- right_oeq_classes: &[OrderingEquivalentClass],
+ right_oeq_class: &OrderingEquivalentClass,
left_columns_len: usize,
join_eq_properties: &EquivalenceProperties,
-) -> Result<Vec<OrderingEquivalentClass>> {
- let updated_oeqs = match join_type {
+) -> Result<OrderingEquivalentClass> {
+ match join_type {
// In these modes, indices of the right schema should be offset by
// the left table size.
JoinType::Inner | JoinType::Left | JoinType::Full | JoinType::Right =>
{
- add_offset_to_ordering_equivalence_classes(
- right_oeq_classes,
- left_columns_len,
- )?
+ let right_oeq_class =
right_oeq_class.add_offset(left_columns_len)?;
Review Comment:
I am a big fan of putting the logic into `OrderingEquivalentClass` rather
than free functions that take an `OrderingEquivalentClass`
##########
datafusion/physical-expr/src/equivalence.rs:
##########
@@ -37,12 +41,12 @@ use std::sync::Arc;
/// 1. Equality conditions (like `A=B`), when `T` = [`Column`]
/// 2. Ordering (like `A ASC = B ASC`), when `T` = [`PhysicalSortExpr`]
#[derive(Debug, Clone)]
-pub struct EquivalenceProperties<T = Column> {
- classes: Vec<EquivalentClass<T>>,
+pub struct EquivalenceProperties {
Review Comment:
❤️
##########
datafusion/physical-expr/src/analysis.rs:
##########
@@ -191,12 +191,15 @@ fn shrink_boundaries(
})?;
let final_result = graph.get_interval(*root_index);
+ // If during selectivity calculation we encounter an error, use 1.0 as
cardinality estimate
+ // safest estimate(e.q largest possible value).
let selectivity = calculate_selectivity(
&final_result.lower.value,
&final_result.upper.value,
&target_boundaries,
&initial_boundaries,
- )?;
+ )
+ .unwrap_or(1.0);
Review Comment:
I think this is an example of
https://github.com/apache/arrow-datafusion/issues/7552 -- ignoring errors in
the normal path results in potential slowdowns of planning
can we please fix the underlying issue here rather than ignoring it? Maybe
we need to change the interval analysis API to return `Result<Option<>>` rather
than just `Result`
In general ignoring the errors I think is just papering over real problems
##########
datafusion/core/src/physical_plan/filter.rs:
##########
@@ -153,7 +154,19 @@ impl ExecutionPlan for FilterExec {
}
fn ordering_equivalence_properties(&self) -> OrderingEquivalenceProperties
{
- self.input.ordering_equivalence_properties()
+ let stats = self.statistics();
+ // Add the columns that have only one value (singleton) after
filtering to constants.
Review Comment:
Why is there a a difference between `OrderingEquivalenceProperties` and
`EquivalenceProperties` -- it seems like using statistics based equivalence as
well as predicate based equivalence would be relevant to both
In other words, if the filter has a predicate like `column = 5` shouldn't
`column` be added to list of constants even if the `column` had more than one
value in the statistics?
##########
datafusion/sqllogictest/test_files/select.slt:
##########
@@ -888,6 +888,112 @@ physical_plan
ProjectionExec: expr=[a@0 as a, b@1 as b, 2 as Int64(2)]
--CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b],
output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], has_header=true
+# source is ordered by a,b,c
+# when filter result is constant for column a
+# ordering b, c is still satisfied. Final plan shouldn't have
Review Comment:
👍
##########
datafusion/sqllogictest/test_files/select.slt:
##########
@@ -888,6 +888,112 @@ physical_plan
ProjectionExec: expr=[a@0 as a, b@1 as b, 2 as Int64(2)]
--CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b],
output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], has_header=true
+# source is ordered by a,b,c
+# when filter result is constant for column a
+# ordering b, c is still satisfied. Final plan shouldn't have
+# SortExec.
+query TT
+EXPLAIN SELECT *
+FROM annotated_data_finite2
+WHERE a=0
+ORDER BY b, c;
+----
+logical_plan
+Sort: annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC
NULLS LAST
+--Filter: annotated_data_finite2.a = Int32(0)
+----TableScan: annotated_data_finite2 projection=[a0, a, b, c, d],
partial_filters=[annotated_data_finite2.a = Int32(0)]
+physical_plan
+SortPreservingMergeExec: [b@2 ASC NULLS LAST,c@3 ASC NULLS LAST]
+--CoalesceBatchesExec: target_batch_size=8192
+----FilterExec: a@1 = 0
+------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+--------CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a,
b, c, d], output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC
NULLS LAST], has_header=true
+
+# source is ordered by a,b,c
+# when filter result is constant for column a and b
+# ordering c is still satisfied. Final plan shouldn't have
+# SortExec.
+query TT
+EXPLAIN SELECT *
+FROM annotated_data_finite2
+WHERE a=0 and b=0
+ORDER BY c;
+----
+logical_plan
+Sort: annotated_data_finite2.c ASC NULLS LAST
+--Filter: annotated_data_finite2.a = Int32(0) AND annotated_data_finite2.b =
Int32(0)
+----TableScan: annotated_data_finite2 projection=[a0, a, b, c, d],
partial_filters=[annotated_data_finite2.a = Int32(0), annotated_data_finite2.b
= Int32(0)]
+physical_plan
+SortPreservingMergeExec: [c@3 ASC NULLS LAST]
+--CoalesceBatchesExec: target_batch_size=8192
+----FilterExec: a@1 = 0 AND b@2 = 0
+------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+--------CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a,
b, c, d], output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC
NULLS LAST], has_header=true
+
+# source is ordered by a,b,c
+# when filter result is constant for column a and b
+# ordering b, c is still satisfied. Final plan shouldn't have
+# SortExec.
+query TT
+EXPLAIN SELECT *
+FROM annotated_data_finite2
+WHERE a=0 and b=0
+ORDER BY b, c;
+----
+logical_plan
+Sort: annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC
NULLS LAST
+--Filter: annotated_data_finite2.a = Int32(0) AND annotated_data_finite2.b =
Int32(0)
+----TableScan: annotated_data_finite2 projection=[a0, a, b, c, d],
partial_filters=[annotated_data_finite2.a = Int32(0), annotated_data_finite2.b
= Int32(0)]
+physical_plan
+SortPreservingMergeExec: [b@2 ASC NULLS LAST,c@3 ASC NULLS LAST]
+--CoalesceBatchesExec: target_batch_size=8192
+----FilterExec: a@1 = 0 AND b@2 = 0
+------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+--------CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a,
b, c, d], output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC
NULLS LAST], has_header=true
+
+# source is ordered by a,b,c
+# when filter result is constant for column a and b
+# ordering a, b, c is still satisfied. Final plan shouldn't have
+# SortExec.
+query TT
+EXPLAIN SELECT *
+FROM annotated_data_finite2
+WHERE a=0 and b=0
+ORDER BY a, b, c;
+----
+logical_plan
+Sort: annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC
NULLS LAST, annotated_data_finite2.c ASC NULLS LAST
+--Filter: annotated_data_finite2.a = Int32(0) AND annotated_data_finite2.b =
Int32(0)
+----TableScan: annotated_data_finite2 projection=[a0, a, b, c, d],
partial_filters=[annotated_data_finite2.a = Int32(0), annotated_data_finite2.b
= Int32(0)]
+physical_plan
+SortPreservingMergeExec: [a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,c@3 ASC NULLS
LAST]
+--CoalesceBatchesExec: target_batch_size=8192
+----FilterExec: a@1 = 0 AND b@2 = 0
+------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+--------CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a,
b, c, d], output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC
NULLS LAST], has_header=true
+
+# source is ordered by a,b,c
+# when filter result is when filter contains or
+# column a, and b may not be constant. Hence final plan
+# should contain SortExec
+query TT
+EXPLAIN SELECT *
+FROM annotated_data_finite2
+WHERE a=0 or b=0
+ORDER BY c;
+----
+logical_plan
+Sort: annotated_data_finite2.c ASC NULLS LAST
+--Filter: annotated_data_finite2.a = Int32(0) OR annotated_data_finite2.b =
Int32(0)
+----TableScan: annotated_data_finite2 projection=[a0, a, b, c, d],
partial_filters=[annotated_data_finite2.a = Int32(0) OR
annotated_data_finite2.b = Int32(0)]
+physical_plan
+SortPreservingMergeExec: [c@3 ASC NULLS LAST]
+--SortExec: expr=[c@3 ASC NULLS LAST]
Review Comment:
👍
##########
datafusion/sqllogictest/test_files/subquery.slt:
##########
@@ -284,19 +284,20 @@ Projection: t1.t1_id, __scalar_sq_1.SUM(t2.t2_int) AS
t2_sum
------------TableScan: t2 projection=[t2_id, t2_int]
physical_plan
ProjectionExec: expr=[t1_id@0 as t1_id, SUM(t2.t2_int)@1 as t2_sum]
---CoalesceBatchesExec: target_batch_size=8192
-----HashJoinExec: mode=Partitioned, join_type=Left, on=[(t1_id@0, t2_id@1)]
-------CoalesceBatchesExec: target_batch_size=8192
---------RepartitionExec: partitioning=Hash([t1_id@0], 4), input_partitions=4
-----------MemoryExec: partitions=4, partition_sizes=[1, 0, 0, 0]
-------ProjectionExec: expr=[SUM(t2.t2_int)@1 as SUM(t2.t2_int), t2_id@0 as
t2_id]
+--ProjectionExec: expr=[t1_id@2 as t1_id, SUM(t2.t2_int)@0 as SUM(t2.t2_int),
t2_id@1 as t2_id]
+----CoalesceBatchesExec: target_batch_size=8192
+------HashJoinExec: mode=Partitioned, join_type=Right, on=[(t2_id@1, t1_id@0)]
Review Comment:
Is the difference in this plan that the inputs switched order? Do you know
why they did?
##########
datafusion/sqllogictest/test_files/window.slt:
##########
@@ -2342,11 +2342,10 @@ Limit: skip=0, fetch=5
----------TableScan: aggregate_test_100 projection=[c9]
physical_plan
GlobalLimitExec: skip=0, fetch=5
---SortExec: fetch=5, expr=[rn1@1 ASC NULLS LAST,c9@0 ASC NULLS LAST]
-----ProjectionExec: expr=[c9@0 as c9, ROW_NUMBER() ORDER BY
[aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND
CURRENT ROW@1 as rn1]
-------BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9
DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field {
name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE
BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable:
false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame
{ units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }],
mode=[Sorted]
---------SortExec: expr=[c9@0 DESC]
-----------CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9],
has_header=true
+--ProjectionExec: expr=[c9@0 as c9, ROW_NUMBER() ORDER BY
[aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND
CURRENT ROW@1 as rn1]
Review Comment:
Since the output is ordered by `rn1` given it comes from the window function
`row_number()` I think this plan is both correct as well as better.
##########
datafusion/physical-expr/src/equivalence.rs:
##########
@@ -37,12 +41,12 @@ use std::sync::Arc;
/// 1. Equality conditions (like `A=B`), when `T` = [`Column`]
/// 2. Ordering (like `A ASC = B ASC`), when `T` = [`PhysicalSortExpr`]
#[derive(Debug, Clone)]
-pub struct EquivalenceProperties<T = Column> {
- classes: Vec<EquivalentClass<T>>,
+pub struct EquivalenceProperties {
Review Comment:
❤️
##########
datafusion/sqllogictest/test_files/select.slt:
##########
@@ -888,6 +888,112 @@ physical_plan
ProjectionExec: expr=[a@0 as a, b@1 as b, 2 as Int64(2)]
--CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b],
output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], has_header=true
+# source is ordered by a,b,c
+# when filter result is constant for column a
+# ordering b, c is still satisfied. Final plan shouldn't have
+# SortExec.
+query TT
+EXPLAIN SELECT *
+FROM annotated_data_finite2
+WHERE a=0
+ORDER BY b, c;
+----
+logical_plan
+Sort: annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC
NULLS LAST
+--Filter: annotated_data_finite2.a = Int32(0)
+----TableScan: annotated_data_finite2 projection=[a0, a, b, c, d],
partial_filters=[annotated_data_finite2.a = Int32(0)]
+physical_plan
+SortPreservingMergeExec: [b@2 ASC NULLS LAST,c@3 ASC NULLS LAST]
+--CoalesceBatchesExec: target_batch_size=8192
+----FilterExec: a@1 = 0
+------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+--------CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a,
b, c, d], output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC
NULLS LAST], has_header=true
+
+# source is ordered by a,b,c
+# when filter result is constant for column a and b
+# ordering c is still satisfied. Final plan shouldn't have
+# SortExec.
+query TT
+EXPLAIN SELECT *
+FROM annotated_data_finite2
+WHERE a=0 and b=0
+ORDER BY c;
+----
+logical_plan
+Sort: annotated_data_finite2.c ASC NULLS LAST
+--Filter: annotated_data_finite2.a = Int32(0) AND annotated_data_finite2.b =
Int32(0)
+----TableScan: annotated_data_finite2 projection=[a0, a, b, c, d],
partial_filters=[annotated_data_finite2.a = Int32(0), annotated_data_finite2.b
= Int32(0)]
+physical_plan
+SortPreservingMergeExec: [c@3 ASC NULLS LAST]
+--CoalesceBatchesExec: target_batch_size=8192
+----FilterExec: a@1 = 0 AND b@2 = 0
+------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+--------CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a,
b, c, d], output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC
NULLS LAST], has_header=true
+
+# source is ordered by a,b,c
+# when filter result is constant for column a and b
+# ordering b, c is still satisfied. Final plan shouldn't have
+# SortExec.
+query TT
+EXPLAIN SELECT *
+FROM annotated_data_finite2
+WHERE a=0 and b=0
+ORDER BY b, c;
+----
+logical_plan
+Sort: annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC
NULLS LAST
+--Filter: annotated_data_finite2.a = Int32(0) AND annotated_data_finite2.b =
Int32(0)
+----TableScan: annotated_data_finite2 projection=[a0, a, b, c, d],
partial_filters=[annotated_data_finite2.a = Int32(0), annotated_data_finite2.b
= Int32(0)]
+physical_plan
+SortPreservingMergeExec: [b@2 ASC NULLS LAST,c@3 ASC NULLS LAST]
+--CoalesceBatchesExec: target_batch_size=8192
+----FilterExec: a@1 = 0 AND b@2 = 0
+------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+--------CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a,
b, c, d], output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC
NULLS LAST], has_header=true
+
+# source is ordered by a,b,c
+# when filter result is constant for column a and b
+# ordering a, b, c is still satisfied. Final plan shouldn't have
+# SortExec.
+query TT
+EXPLAIN SELECT *
+FROM annotated_data_finite2
+WHERE a=0 and b=0
+ORDER BY a, b, c;
+----
+logical_plan
+Sort: annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC
NULLS LAST, annotated_data_finite2.c ASC NULLS LAST
+--Filter: annotated_data_finite2.a = Int32(0) AND annotated_data_finite2.b =
Int32(0)
+----TableScan: annotated_data_finite2 projection=[a0, a, b, c, d],
partial_filters=[annotated_data_finite2.a = Int32(0), annotated_data_finite2.b
= Int32(0)]
+physical_plan
+SortPreservingMergeExec: [a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,c@3 ASC NULLS
LAST]
+--CoalesceBatchesExec: target_batch_size=8192
+----FilterExec: a@1 = 0 AND b@2 = 0
+------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+--------CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a,
b, c, d], output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC
NULLS LAST], has_header=true
+
+# source is ordered by a,b,c
+# when filter result is when filter contains or
+# column a, and b may not be constant. Hence final plan
+# should contain SortExec
+query TT
+EXPLAIN SELECT *
+FROM annotated_data_finite2
+WHERE a=0 or b=0
+ORDER BY c;
+----
+logical_plan
+Sort: annotated_data_finite2.c ASC NULLS LAST
+--Filter: annotated_data_finite2.a = Int32(0) OR annotated_data_finite2.b =
Int32(0)
+----TableScan: annotated_data_finite2 projection=[a0, a, b, c, d],
partial_filters=[annotated_data_finite2.a = Int32(0) OR
annotated_data_finite2.b = Int32(0)]
+physical_plan
+SortPreservingMergeExec: [c@3 ASC NULLS LAST]
+--SortExec: expr=[c@3 ASC NULLS LAST]
Review Comment:
👍
##########
datafusion/sqllogictest/test_files/select.slt:
##########
@@ -888,6 +888,112 @@ physical_plan
ProjectionExec: expr=[a@0 as a, b@1 as b, 2 as Int64(2)]
--CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b],
output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], has_header=true
+# source is ordered by a,b,c
+# when filter result is constant for column a
+# ordering b, c is still satisfied. Final plan shouldn't have
Review Comment:
👍
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]