kosiew commented on code in PR #16696:
URL: https://github.com/apache/datafusion/pull/16696#discussion_r2389737067


##########
datafusion/optimizer/tests/optimizer_integration.rs:
##########
@@ -478,6 +514,90 @@ fn 
select_correlated_predicate_subquery_with_uppercase_ident() {
     );
 }
 
+#[test]
+fn recursive_cte_projection_pushdown() -> Result<()> {
+    // Test that projection pushdown works with recursive CTEs by ensuring
+    // only the required columns are projected from the base table, even when
+    // the CTE definition includes unused columns
+    let sql = "WITH RECURSIVE nodes AS (\
+        SELECT col_int32 AS id, col_utf8 AS name, col_uint32 AS extra FROM 
test \
+        UNION ALL \
+        SELECT id + 1, name, extra FROM nodes WHERE id < 3\
+    ) SELECT id FROM nodes";
+    let plan = test_sql(sql)?;
+
+    // The optimizer successfully performs projection pushdown by only 
selecting the needed
+    // columns from the base table and recursive table, eliminating unused 
columns
+    assert_snapshot!(
+        format!("{plan}"),
+        @r#"SubqueryAlias: nodes
+  RecursiveQuery: is_distinct=false
+    Projection: test.col_int32 AS id
+      TableScan: test projection=[col_int32]
+    Projection: CAST(CAST(nodes.id AS Int64) + Int64(1) AS Int32)
+      Filter: nodes.id < Int32(3)
+        TableScan: nodes projection=[id]
+"#
+    );
+    Ok(())
+}
+
+#[test]
+fn recursive_cte_with_unused_columns() -> Result<()> {
+    // Test projection pushdown with a recursive CTE where the base case
+    // includes columns that are never used in the recursive part or final 
result
+    let sql = "WITH RECURSIVE series AS (\
+        SELECT 1 AS n, col_utf8, col_uint32, col_date32 FROM test WHERE 
col_int32 = 1 \
+        UNION ALL \
+        SELECT n + 1, col_utf8, col_uint32, col_date32 FROM series WHERE n < 3\
+    ) SELECT n FROM series";
+    let plan = test_sql(sql)?;
+
+    // The optimizer successfully performs projection pushdown by eliminating 
unused columns
+    // even when they're defined in the CTE but not actually needed
+    assert_snapshot!(
+        format!("{plan}"),
+        @r#"SubqueryAlias: series
+  RecursiveQuery: is_distinct=false
+    Projection: Int64(1) AS n
+      Filter: test.col_int32 = Int32(1)
+        TableScan: test projection=[col_int32]
+    Projection: series.n + Int64(1)
+      Filter: series.n < Int64(3)
+        TableScan: series projection=[n]
+"#
+    );
+    Ok(())
+}
+
+#[test]
+fn recursive_cte_true_projection_pushdown() -> Result<()> {
+    // Test case that truly demonstrates projection pushdown working:
+    // The base case only selects needed columns

Review Comment:
   The two preceding tests deliberately start with extra columns in either the 
base term or the recursive term and check that we trim them back to just the 
referenced field (TableScan: test projection=[col_int32] / TableScan: 
nodes|series projection=[id|n]). The “true” case is meant to be the baseline: 
once everything is pruned, the recursive query should remain at that minimal 
shape (TableScan: test projection=[col_int32] and TableScan: countdown 
projection=[n]). I’ll rename the test (e.g. 
recursive_cte_projection_pushdown_baseline) and expand the comment so the 
distinction is clearer.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to