vbarua commented on code in PR #13931:
URL: https://github.com/apache/datafusion/pull/13931#discussion_r1899707055
##########
datafusion/substrait/src/logical_plan/producer.rs:
##########
@@ -998,450 +1304,418 @@ pub fn make_binary_op_scalar_func(
/// Convert DataFusion Expr to Substrait Rex
///
/// # Arguments
-///
-/// * `expr` - DataFusion expression to be parse into a Substrait expression
-/// * `schema` - DataFusion input schema for looking up field qualifiers
-/// * `col_ref_offset` - Offset for calculating Substrait field reference
indices.
-/// This should only be set by caller with more than one
input relations i.e. Join.
-/// Substrait expects one set of indices when joining two
relations.
-/// Let's say `left` and `right` have `m` and `n` columns,
respectively. The `right`
-/// relation will have column indices from `0` to `n-1`,
however, Substrait will expect
-/// the `right` indices to be offset by the `left`. This
means Substrait will expect to
-/// evaluate the join condition expression on indices [0
.. n-1, n .. n+m-1]. For example:
-/// ```SELECT *
-/// FROM t1
-/// JOIN t2
-/// ON t1.c1 = t2.c0;```
-/// where t1 consists of columns [c0, c1, c2], and t2 =
columns [c0, c1]
-/// the join condition should become
-/// `col_ref(1) = col_ref(3 + 0)`
-/// , where `3` is the number of `left` columns
(`col_ref_offset`) and `0` is the index
-/// of the join key column from `right`
-/// * `extensions` - Substrait extension info. Contains registered function
information
-#[allow(deprecated)]
+/// * `producer` - SubstraitProducer implementation which the handles the
actual conversion
+/// * `expr` - DataFusion expression to convert into a Substrait expression
+/// * `schema` - DataFusion input schema for looking up columns
pub fn to_substrait_rex(
- state: &dyn SubstraitPlanningState,
+ producer: &mut impl SubstraitProducer,
expr: &Expr,
schema: &DFSchemaRef,
- col_ref_offset: usize,
- extensions: &mut Extensions,
) -> Result<Expression> {
match expr {
- Expr::InList(InList {
- expr,
- list,
- negated,
- }) => {
- let substrait_list = list
- .iter()
- .map(|x| to_substrait_rex(state, x, schema, col_ref_offset,
extensions))
- .collect::<Result<Vec<Expression>>>()?;
- let substrait_expr =
- to_substrait_rex(state, expr, schema, col_ref_offset,
extensions)?;
-
- let substrait_or_list = Expression {
- rex_type: Some(RexType::SingularOrList(Box::new(SingularOrList
{
- value: Some(Box::new(substrait_expr)),
- options: substrait_list,
- }))),
- };
-
- if *negated {
- let function_anchor =
extensions.register_function("not".to_string());
-
- Ok(Expression {
- rex_type: Some(RexType::ScalarFunction(ScalarFunction {
- function_reference: function_anchor,
- arguments: vec![FunctionArgument {
- arg_type: Some(ArgType::Value(substrait_or_list)),
- }],
- output_type: None,
- args: vec![],
- options: vec![],
- })),
- })
- } else {
- Ok(substrait_or_list)
- }
+ Expr::Alias(expr) => producer.consume_alias(expr, schema),
+ Expr::Column(expr) => producer.consume_column(expr, schema),
+ Expr::Literal(expr) => producer.consume_literal(expr),
+ Expr::BinaryExpr(expr) => producer.consume_binary_expr(expr, schema),
+ Expr::Like(expr) => producer.consume_like(expr, schema),
+ Expr::SimilarTo(_) => not_impl_err!("SimilarTo is not supported"),
+ Expr::Not(_) => producer.consume_unary_expr(expr, schema),
+ Expr::IsNotNull(_) => producer.consume_unary_expr(expr, schema),
+ Expr::IsNull(_) => producer.consume_unary_expr(expr, schema),
+ Expr::IsTrue(_) => producer.consume_unary_expr(expr, schema),
+ Expr::IsFalse(_) => producer.consume_unary_expr(expr, schema),
+ Expr::IsUnknown(_) => producer.consume_unary_expr(expr, schema),
+ Expr::IsNotTrue(_) => producer.consume_unary_expr(expr, schema),
+ Expr::IsNotFalse(_) => producer.consume_unary_expr(expr, schema),
+ Expr::IsNotUnknown(_) => producer.consume_unary_expr(expr, schema),
+ Expr::Negative(_) => producer.consume_unary_expr(expr, schema),
+ Expr::Between(expr) => producer.consume_between(expr, schema),
+ Expr::Case(expr) => producer.consume_case(expr, schema),
+ Expr::Cast(expr) => producer.consume_cast(expr, schema),
+ Expr::TryCast(expr) => producer.consume_try_cast(expr, schema),
+ Expr::ScalarFunction(expr) => producer.consume_scalar_function(expr,
schema),
+ Expr::AggregateFunction(_) => {
+ internal_err!(
+ "AggregateFunction should only be encountered as part of a
LogicalPlan::Aggregate"
+ )
}
- Expr::ScalarFunction(fun) => {
- let mut arguments: Vec<FunctionArgument> = vec![];
- for arg in &fun.args {
- arguments.push(FunctionArgument {
- arg_type: Some(ArgType::Value(to_substrait_rex(
- state,
- arg,
- schema,
- col_ref_offset,
- extensions,
- )?)),
- });
- }
+ Expr::WindowFunction(expr) => producer.consume_window_function(expr,
schema),
+ Expr::InList(expr) => producer.consume_in_list(expr, schema),
+ Expr::InSubquery(expr) => producer.consume_in_subquery(expr, schema),
+ _ => not_impl_err!("Cannot convert {expr:?} to Substrait"),
Review Comment:
Easy enough to expand out. Updated.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]