findepi commented on code in PR #13061:
URL: https://github.com/apache/datafusion/pull/13061#discussion_r1823964108


##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -1446,18 +1446,54 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for 
Simplifier<'a, S> {
 
             // Rules for Like
             Expr::Like(Like {
-                expr,
-                pattern,
+                expr: ref like_expr,

Review Comment:
   keep `expr` as `expr` 
   use `like_expr` when referring to the whole `Expr::Like` thing



##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -1446,18 +1446,54 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for 
Simplifier<'a, S> {
 
             // Rules for Like
             Expr::Like(Like {
-                expr,
-                pattern,
+                expr: ref like_expr,
+                ref pattern, // Changed to ref pattern to avoid moving
                 negated,
-                escape_char: _,
-                case_insensitive: _,
-            }) if !is_null(&expr)
-                && matches!(
-                    pattern.as_ref(),
-                    Expr::Literal(ScalarValue::Utf8(Some(pattern_str))) if 
pattern_str == "%"
-                ) =>
-            {
-                Transformed::yes(lit(!negated))
+                escape_char,
+                case_insensitive,
+            }) => {
+                if let Expr::Literal(ScalarValue::Utf8(Some(pattern))) = 
pattern.as_ref()
+                {
+                    // Special case: pattern is just "%"
+                    if pattern == "%" && !is_null(like_expr) {
+                        return Ok(Transformed::yes(lit(!negated)));
+                    }
+
+                    let pct_wildcard_index = pattern.find('%');
+                    let underscore_index = pattern.find('_');
+
+                    // If no wildcards, treat as equality check
+                    if underscore_index.is_none() && 
pct_wildcard_index.is_none() {
+                        return Ok(Transformed::yes(Expr::BinaryExpr(BinaryExpr 
{
+                            left: like_expr.clone(),
+                            op: if negated {
+                                Operator::NotEq
+                            } else {
+                                Operator::Eq
+                            },
+                            right: 
Box::new(Expr::Literal(ScalarValue::Utf8(Some(
+                                pattern.to_string(),
+                            )))),
+                        })));
+                    }
+
+                    // Handle single % at end case (prefix search)
+                    if let Some(index) = pct_wildcard_index {
+                        if index == pattern.len() - 1 && !case_insensitive && 
escape_char.is_none() {
+                            let prefix = pattern[..index].to_string();
+                            let new_expr = Expr::ScalarFunction(ScalarFunction 
{
+                                func: 
datafusion_functions::string::starts_with(),

Review Comment:
   
   Today a downstream project can choose which functions to use, this is 
configurable in the session, right? The DF repertoire of functions serves as a 
convenient package, but not mandatory.
   Here we're inserting implicit function call to a specific function.
   Is this following a pre-existing pattern? if so, i am OK with this.
   But maybe it would be better to eg consult session state whether this 
function was supposed to be used. What do you think?



##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -1446,18 +1446,54 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for 
Simplifier<'a, S> {
 
             // Rules for Like
             Expr::Like(Like {
-                expr,
-                pattern,
+                expr: ref like_expr,
+                ref pattern, // Changed to ref pattern to avoid moving
                 negated,
-                escape_char: _,
-                case_insensitive: _,
-            }) if !is_null(&expr)
-                && matches!(
-                    pattern.as_ref(),
-                    Expr::Literal(ScalarValue::Utf8(Some(pattern_str))) if 
pattern_str == "%"
-                ) =>
-            {
-                Transformed::yes(lit(!negated))
+                escape_char,
+                case_insensitive,
+            }) => {
+                if let Expr::Literal(ScalarValue::Utf8(Some(pattern))) = 
pattern.as_ref()
+                {
+                    // Special case: pattern is just "%"
+                    if pattern == "%" && !is_null(like_expr) {
+                        return Ok(Transformed::yes(lit(!negated)));

Review Comment:
   The easiest solution would be to just drop this special case.
   I.e. `expr LIKE '%'` would get simplified to `starts_with(expr, '')`



##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -1446,18 +1446,54 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for 
Simplifier<'a, S> {
 
             // Rules for Like
             Expr::Like(Like {
-                expr,
-                pattern,
+                expr: ref like_expr,
+                ref pattern, // Changed to ref pattern to avoid moving
                 negated,
-                escape_char: _,
-                case_insensitive: _,
-            }) if !is_null(&expr)
-                && matches!(
-                    pattern.as_ref(),
-                    Expr::Literal(ScalarValue::Utf8(Some(pattern_str))) if 
pattern_str == "%"
-                ) =>
-            {
-                Transformed::yes(lit(!negated))
+                escape_char,
+                case_insensitive,
+            }) => {
+                if let Expr::Literal(ScalarValue::Utf8(Some(pattern))) = 
pattern.as_ref()
+                {
+                    // Special case: pattern is just "%"
+                    if pattern == "%" && !is_null(like_expr) {
+                        return Ok(Transformed::yes(lit(!negated)));
+                    }
+
+                    let pct_wildcard_index = pattern.find('%');
+                    let underscore_index = pattern.find('_');

Review Comment:
   Honor `escape_char`



##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -3589,6 +3635,18 @@ mod tests {
         let expr = not_ilike(col("c1"), "%");
         assert_eq!(simplify(expr), lit(false));
 
+        let expr = like(col("c1"), "a%");

Review Comment:
   add test cases
   
   - with `%` pattern 
   - with `_` pattern (can be simplified to a length check, but that can be 
follow up)
   
   - with `%%` and `%%%` patterns (can be simplified just like single `%`)
   - with `__` and `___` patterns (can be simplified to a length check, but 
that can be follow up)
   - with patterns combining % and _, eg `a_b%`, `a%b_` (these cannot 
simplified) and  `_%`,  `%_`  (these can be simplified to a length check, but 
that can be follow up)



##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -1446,18 +1446,54 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for 
Simplifier<'a, S> {
 
             // Rules for Like
             Expr::Like(Like {
-                expr,
-                pattern,
+                expr: ref like_expr,
+                ref pattern, // Changed to ref pattern to avoid moving
                 negated,
-                escape_char: _,
-                case_insensitive: _,
-            }) if !is_null(&expr)
-                && matches!(
-                    pattern.as_ref(),
-                    Expr::Literal(ScalarValue::Utf8(Some(pattern_str))) if 
pattern_str == "%"
-                ) =>
-            {
-                Transformed::yes(lit(!negated))
+                escape_char,
+                case_insensitive,
+            }) => {
+                if let Expr::Literal(ScalarValue::Utf8(Some(pattern))) = 
pattern.as_ref()
+                {
+                    // Special case: pattern is just "%"
+                    if pattern == "%" && !is_null(like_expr) {
+                        return Ok(Transformed::yes(lit(!negated)));

Review Comment:
   This is cool idea, but `is_null(like_expr)` checks makes it incorrect.
   This function returns "is this always null". Applied to a column reference, 
it will return false, so this optimization will kick in, but the column values 
are still _nullable_.
   
   The "more correct" way would be to
   
   - optimize `WHERE col LIKE '%'` to `WHERE col IS NOT NULL`
   - optimize `WHERE col NOT LIKE '%'` to `WHERE false`.
   
   However, this simplification cannot be done **here**. It can only be done 
for top-level filter conjuncts, or when expr being simplified is immediately 
used as a condition (eg `IF(expr, ...)`). In other cases, this need to be 
NULL-preserving boolean logic:
   
   ```
   trino> SELECT a, a LIKE '%', a NOT LIKE '%'
       -> FROM (VALUES NULL, '', 'x') t(a);
     a   | _col1 | _col2
   ------+-------+-------
    NULL | NULL  | NULL
         | true  | false
    x    | true  | false
   (3 rows)
   ```
   
   
   
   
   
   
   



##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -1446,18 +1446,54 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for 
Simplifier<'a, S> {
 
             // Rules for Like
             Expr::Like(Like {
-                expr,
-                pattern,
+                expr: ref like_expr,
+                ref pattern, // Changed to ref pattern to avoid moving
                 negated,
-                escape_char: _,
-                case_insensitive: _,
-            }) if !is_null(&expr)
-                && matches!(
-                    pattern.as_ref(),
-                    Expr::Literal(ScalarValue::Utf8(Some(pattern_str))) if 
pattern_str == "%"
-                ) =>
-            {
-                Transformed::yes(lit(!negated))
+                escape_char,
+                case_insensitive,
+            }) => {
+                if let Expr::Literal(ScalarValue::Utf8(Some(pattern))) = 
pattern.as_ref()
+                {
+                    // Special case: pattern is just "%"
+                    if pattern == "%" && !is_null(like_expr) {
+                        return Ok(Transformed::yes(lit(!negated)));
+                    }
+
+                    let pct_wildcard_index = pattern.find('%');
+                    let underscore_index = pattern.find('_');
+
+                    // If no wildcards, treat as equality check
+                    if underscore_index.is_none() && 
pct_wildcard_index.is_none() {
+                        return Ok(Transformed::yes(Expr::BinaryExpr(BinaryExpr 
{
+                            left: like_expr.clone(),
+                            op: if negated {
+                                Operator::NotEq
+                            } else {
+                                Operator::Eq
+                            },
+                            right: 
Box::new(Expr::Literal(ScalarValue::Utf8(Some(
+                                pattern.to_string(),
+                            )))),
+                        })));
+                    }
+
+                    // Handle single % at end case (prefix search)
+                    if let Some(index) = pct_wildcard_index {
+                        if index == pattern.len() - 1 && !case_insensitive && 
escape_char.is_none() {

Review Comment:
   `&& underscore_index.is_none()`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to