kadinrabo commented on code in PR #19739:
URL: https://github.com/apache/datafusion/pull/19739#discussion_r2709439239


##########
datafusion/substrait/src/logical_plan/producer/expr/mod.rs:
##########
@@ -139,16 +139,16 @@ pub fn to_substrait_rex(
         }
         Expr::WindowFunction(expr) => producer.handle_window_function(expr, 
schema),
         Expr::InList(expr) => producer.handle_in_list(expr, schema),
-        Expr::Exists(expr) => not_impl_err!("Cannot convert {expr:?} to 
Substrait"),
+        Expr::Exists(expr) => producer.handle_exists(expr, schema),
         Expr::InSubquery(expr) => producer.handle_in_subquery(expr, schema),
-        Expr::ScalarSubquery(expr) => {
-            not_impl_err!("Cannot convert {expr:?} to Substrait")
-        }
+        Expr::ScalarSubquery(expr) => producer.handle_scalar_subquery(expr, 
schema),
         #[expect(deprecated)]
         Expr::Wildcard { .. } => not_impl_err!("Cannot convert {expr:?} to 
Substrait"),
         Expr::GroupingSet(expr) => not_impl_err!("Cannot convert {expr:?} to 
Substrait"),
         Expr::Placeholder(expr) => not_impl_err!("Cannot convert {expr:?} to 
Substrait"),
         Expr::OuterReferenceColumn(_, _) => {
+            // OuterReferenceColumn requires tracking outer query schema 
context for correlated

Review Comment:
   I'm still getting a lot of `This feature is not implemented: Cannot convert 
OuterReferenceColumn` errors when running the tests so maybe this PR can 
partially close the issue instead of completely? Unless you're still working on 
it



##########
datafusion/substrait/src/logical_plan/producer/expr/subquery.rs:
##########
@@ -70,3 +71,70 @@ pub fn from_in_subquery(
         Ok(substrait_subquery)
     }
 }
+
+/// Convert DataFusion ScalarSubquery to Substrait Scalar subquery type
+pub fn from_scalar_subquery(
+    producer: &mut impl SubstraitProducer,
+    subquery: &Subquery,
+    _schema: &DFSchemaRef,
+) -> datafusion::common::Result<Expression> {
+    let subquery_plan = producer.handle_plan(subquery.subquery.as_ref())?;
+
+    Ok(Expression {
+        rex_type: Some(RexType::Subquery(Box::new(
+            substrait::proto::expression::Subquery {
+                subquery_type: Some(
+                    
substrait::proto::expression::subquery::SubqueryType::Scalar(
+                        Box::new(Scalar {
+                            input: Some(subquery_plan),
+                        }),
+                    ),
+                ),
+            },
+        ))),
+    })
+}
+
+/// Convert DataFusion Exists expression to Substrait SetPredicate subquery 
type
+pub fn from_exists(
+    producer: &mut impl SubstraitProducer,
+    exists: &Exists,
+    _schema: &DFSchemaRef,
+) -> datafusion::common::Result<Expression> {
+    let subquery_plan = 
producer.handle_plan(exists.subquery.subquery.as_ref())?;
+
+    let substrait_exists = Expression {
+        rex_type: Some(RexType::Subquery(Box::new(
+            substrait::proto::expression::Subquery {
+                subquery_type: Some(
+                    
substrait::proto::expression::subquery::SubqueryType::SetPredicate(
+                        Box::new(SetPredicate {
+                            predicate_op: 
substrait::proto::expression::subquery::set_predicate::PredicateOp::Exists as 
i32,
+                            tuples: Some(subquery_plan),
+                        }),
+                    ),
+                ),
+            },
+        ))),
+    };
+
+    // Handle negated EXISTS (NOT EXISTS)
+    if exists.negated {

Review Comment:
   There's no `PREDICATE_OP_NOT_EXISTS` in the spec so I think this a 
reasonable workaround. Minor note, the consumer hardcodes `negated:false` so I 
don't think `NOT EXISTS`/`NOT IN` will round-trip correctly 
([Exists](https://github.com/apache/datafusion/blob/dededf7c78d587afc662c8a06fb949a0bf4d457f/datafusion/substrait/src/logical_plan/consumer/expr/subquery.rs#L92)/[InSubquery](https://github.com/apache/datafusion/blob/dededf7c78d587afc662c8a06fb949a0bf4d457f/datafusion/substrait/src/logical_plan/consumer/expr/subquery.rs#L57))



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to