alamb commented on code in PR #17419:
URL: https://github.com/apache/datafusion/pull/17419#discussion_r2337757876
##########
datafusion/sqllogictest/test_files/window.slt:
##########
@@ -3532,7 +3532,7 @@ physical_plan
01)BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.a) ORDER BY
[multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND
CURRENT ROW: Field { name: "sum(multiple_ordered_table.a) ORDER BY
[multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND
CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered:
false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT
ROW], mode=[Sorted]
02)--CoalesceBatchesExec: target_batch_size=4096
03)----FilterExec: b@2 = 0
-04)------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a,
b, c, d], output_orderings=[[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST], [c@3 ASC
NULLS LAST]], file_type=csv, has_header=true
+04)------DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a,
b, c, d], output_orderings=[[c@3 ASC NULLS LAST], [a@1 ASC NULLS LAST, b@2 ASC
NULLS LAST]], file_type=csv, has_header=true
Review Comment:
do you know why the output orderings come out in a different (reverse) order
now?
##########
datafusion/sqllogictest/test_files/order.slt:
##########
@@ -1517,3 +1517,40 @@ SELECT address, zip FROM addresses ORDER BY ALL;
111 Duck Duck Goose Ln 11111
111 Duck Duck Goose Ln 11111-0001
123 Quack Blvd 11111
+
+# Create a table with an order clause that's not a simple column reference
+statement ok
+CREATE EXTERNAL TABLE ordered (
+ a BIGINT NOT NULL,
+ b BIGINT NOT NULL
+)
+STORED AS CSV
+LOCATION 'data/composite_order.csv'
+OPTIONS ('format.has_header' 'true')
+WITH ORDER (a + b);
+
+# Simple query should be just a table scan
+query TT
+EXPLAIN SELECT * from ordered;
+----
+physical_plan DataSourceExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/sqllogictest/data/composite_order.csv]]},
projection=[a, b], output_ordering=[a@0 + b@1 ASC NULLS LAST], file_type=csv,
has_header=true
+
+# Query ordered by the declared order should be just a table scan
Review Comment:
nice
##########
datafusion/catalog/src/stream.rs:
##########
@@ -321,24 +324,26 @@ impl TableProvider for StreamTable {
async fn scan(
&self,
- _state: &dyn Session,
+ state: &dyn Session,
projection: Option<&Vec<usize>>,
_filters: &[Expr],
limit: Option<usize>,
) -> Result<Arc<dyn ExecutionPlan>> {
- let projected_schema = match projection {
- Some(p) => {
- let projected = self.0.source.schema().project(p)?;
- create_ordering(&projected, &self.0.order)?
- }
- None => create_ordering(self.0.source.schema(), &self.0.order)?,
+ let schema = self.0.source.schema();
+ let df_schema = DFSchema::try_from(Arc::clone(schema))?;
Review Comment:
I wonder if (re)creating this DFSchema is necessary -- it feels like at this
point we know the schema information
However, i also see we need to have a DFSchema to correctly create arbitrary
PhysicalExprs so this is probably fine
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]