alamb commented on code in PR #9813:
URL: https://github.com/apache/arrow-datafusion/pull/9813#discussion_r1561549294


##########
datafusion/physical-expr/src/equivalence/mod.rs:
##########
@@ -35,14 +36,59 @@ pub use properties::{join_equivalence_properties, 
EquivalenceProperties};
 /// This function constructs a duplicate-free `LexOrderingReq` by filtering out
 /// duplicate entries that have same physical expression inside. For example,
 /// `vec![a Some(ASC), a Some(DESC)]` collapses to `vec![a Some(ASC)]`.
+///
+/// It will also filter out entries that are ordered if the next entry is;
+/// for instance, `vec![floor(a) Some(ASC), a Some(ASC)]` will be collapsed to
+/// `vec![a Some(ASC)]`.
 pub fn collapse_lex_req(input: LexRequirement) -> LexRequirement {
     let mut output = Vec::<PhysicalSortRequirement>::new();
     for item in input {
         if !output.iter().any(|req| req.expr.eq(&item.expr)) {
             output.push(item);
         }
     }
-    output
+    collapse_monotonic_lex_req(output)
+}
+
+/// This function constructs a normalized [`LexRequirement`] by filtering out 
entries
+/// that are ordered if the next entry is.
+/// Used in `collapse_lex_req`
+fn collapse_monotonic_lex_req(input: LexRequirement) -> LexRequirement {
+    input

Review Comment:
   Nit: we could probably save a copy by using `into_iter()` here rather than 
`iter()` -- then we could skip the `cloned()` at the end too



##########
datafusion/sqllogictest/test_files/filter_without_sort_exec.slt:
##########
@@ -43,19 +43,111 @@ SortPreservingMergeExec: [date@0 ASC NULLS LAST,time@2 ASC 
NULLS LAST]
 ------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 --------StreamingTableExec: partition_sizes=1, projection=[date, ticker, 
time], infinite_source=true, output_ordering=[date@0 ASC NULLS LAST, ticker@1 
ASC NULLS LAST, time@2 ASC NULLS LAST]
 
+# constant ticker, CAST(time AS DATE) = time, order by time
+query TT
+explain SELECT * FROM data
+WHERE ticker = 'A' AND CAST(time AS DATE) = date
+ORDER BY "time"
+----
+logical_plan
+Sort: data.time ASC NULLS LAST
+--Filter: data.ticker = Utf8("A") AND CAST(data.time AS Date32) = data.date
+----TableScan: data projection=[date, ticker, time]
+physical_plan
+SortPreservingMergeExec: [time@2 ASC NULLS LAST]
+--CoalesceBatchesExec: target_batch_size=8192
+----FilterExec: ticker@1 = A AND CAST(time@2 AS Date32) = date@0
+------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+--------StreamingTableExec: partition_sizes=1, projection=[date, ticker, 
time], infinite_source=true, output_ordering=[date@0 ASC NULLS LAST, ticker@1 
ASC NULLS LAST, time@2 ASC NULLS LAST]
+
+# same thing but order by date
+query TT
+explain SELECT * FROM data
+WHERE ticker = 'A' AND CAST(time AS DATE) = date
+ORDER BY "date"
+----
+logical_plan
+Sort: data.date ASC NULLS LAST
+--Filter: data.ticker = Utf8("A") AND CAST(data.time AS Date32) = data.date
+----TableScan: data projection=[date, ticker, time]
+physical_plan
+SortPreservingMergeExec: [date@0 ASC NULLS LAST]
+--CoalesceBatchesExec: target_batch_size=8192
+----FilterExec: ticker@1 = A AND CAST(time@2 AS Date32) = date@0
+------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+--------StreamingTableExec: partition_sizes=1, projection=[date, ticker, 
time], infinite_source=true, output_ordering=[date@0 ASC NULLS LAST, ticker@1 
ASC NULLS LAST, time@2 ASC NULLS LAST]
+
+# same thing but order by ticker
+query TT
+explain SELECT * FROM data
+WHERE ticker = 'A' AND CAST(time AS DATE) = date
+ORDER BY "ticker"
+----
+logical_plan
+Sort: data.ticker ASC NULLS LAST
+--Filter: data.ticker = Utf8("A") AND CAST(data.time AS Date32) = data.date
+----TableScan: data projection=[date, ticker, time]
+physical_plan
+CoalescePartitionsExec

Review Comment:
   this is cool to see it knows it doesn't need to preserve the order at all 
(doesn't have to use sort preserving merge)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to