alamb commented on code in PR #13260:
URL: https://github.com/apache/datafusion/pull/13260#discussion_r1830873028


##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -2987,41 +3051,41 @@ mod tests {
         })
     }
 
-    fn like(expr: Expr, pattern: &str) -> Expr {
+    fn like(expr: Expr, pattern: impl Into<Expr>) -> Expr {

Review Comment:
   I think we could use Expr::like 
https://docs.rs/datafusion/latest/datafusion/logical_expr/enum.Expr.html#method.like
 and similar here instead of these functions
   
   This is likely left over from when the Expr API was less expressive



##########
datafusion/sqllogictest/test_files/string/string_view.slt:
##########
@@ -396,8 +396,9 @@ EXPLAIN SELECT
 FROM test;
 ----
 logical_plan
-01)Projection: test.column1_utf8view LIKE Utf8View("foo") AS like, 
test.column1_utf8view ILIKE Utf8View("foo") AS ilike
-02)--TableScan: test projection=[column1_utf8view]
+01)Projection: __common_expr_1 AS like, __common_expr_1 AS ilike

Review Comment:
   Nice!
   
   Though I think this test's meaning is now changed it is supposed to be 
verifying cast  exprs for like
   
   Perhaps you can change the patterns to `like '%foo%'`



##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -1470,34 +1471,67 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for 
Simplifier<'a, S> {
             }) => Transformed::yes(simplify_regex_expr(left, op, right)?),
 
             // Rules for Like
-            Expr::Like(Like {
-                expr,
-                pattern,
-                negated,
-                escape_char: _,
-                case_insensitive: _,
-            }) if matches!(
-                pattern.as_ref(),
-                Expr::Literal(ScalarValue::Utf8(Some(pattern_str))) if 
pattern_str == "%"
-            ) || matches!(
-                pattern.as_ref(),
-                Expr::Literal(ScalarValue::LargeUtf8(Some(pattern_str))) if 
pattern_str == "%"
-            ) || matches!(
-                pattern.as_ref(),
-                Expr::Literal(ScalarValue::Utf8View(Some(pattern_str))) if 
pattern_str == "%"
-            ) =>
-            {
-                // exp LIKE '%' is
-                //   - when exp is not NULL, it's true
-                //   - when exp is NULL, it's NULL
-                // exp NOT LIKE '%' is
-                //   - when exp is not NULL, it's false
-                //   - when exp is NULL, it's NULL
-                Transformed::yes(Expr::Case(Case {
-                    expr: Some(Box::new(Expr::IsNotNull(expr))),
-                    when_then_expr: vec![(Box::new(lit(true)), 
Box::new(lit(!negated)))],
-                    else_expr: None,
-                }))
+            Expr::Like(like) => {
+                match as_string_scalar(&like.pattern) {
+                    Some((data_type, pattern_str)) => {
+                        match pattern_str {
+                            None => return 
Ok(Transformed::yes(lit_bool_null())),
+                            Some(pattern_str) if pattern_str == "%" => {
+                                // exp LIKE '%' is
+                                //   - when exp is not NULL, it's true
+                                //   - when exp is NULL, it's NULL
+                                // exp NOT LIKE '%' is
+                                //   - when exp is not NULL, it's false
+                                //   - when exp is NULL, it's NULL
+                                let result_for_non_null = lit(!like.negated);
+                                Transformed::yes(if 
!info.nullable(&like.expr)? {
+                                    result_for_non_null
+                                } else {
+                                    Expr::Case(Case {
+                                        expr: 
Some(Box::new(Expr::IsNotNull(like.expr))),
+                                        when_then_expr: vec![(
+                                            Box::new(lit(true)),
+                                            Box::new(result_for_non_null),
+                                        )],
+                                        else_expr: None,
+                                    })
+                                })
+                            }
+                            Some(pattern_str)

Review Comment:
   I am not sure about this rule (see questions below) -- maybe you could add 
some comments explanation on what it is supposed to do. 
   



##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -3633,32 +3697,123 @@ mod tests {
 
     #[test]
     fn test_like_and_ilke() {
-        // LIKE '%'
-        let expr = like(col("c1"), "%");
+        let null = lit(ScalarValue::Utf8(None));
+
+        // expr [NOT] [I]LIKE NULL
+        let expr = like(col("c1"), null.clone());
+        assert_eq!(simplify(expr), lit_bool_null());
+
+        let expr = not_like(col("c1"), null.clone());
+        assert_eq!(simplify(expr), lit_bool_null());
+
+        let expr = ilike(col("c1"), null.clone());
+        assert_eq!(simplify(expr), lit_bool_null());
+
+        let expr = not_ilike(col("c1"), null.clone());
+        assert_eq!(simplify(expr), lit_bool_null());
+
+        // expr [NOT] [I]LIKE '%'
+        let expr = like(col("c1"), lit("%"));
+        assert_eq!(simplify(expr), if_not_null(col("c1"), true));
+
+        let expr = not_like(col("c1"), lit("%"));
+        assert_eq!(simplify(expr), if_not_null(col("c1"), false));
+
+        let expr = ilike(col("c1"), lit("%"));
+        assert_eq!(simplify(expr), if_not_null(col("c1"), true));
+
+        let expr = not_ilike(col("c1"), lit("%"));
+        assert_eq!(simplify(expr), if_not_null(col("c1"), false));
+
+        // expr [NOT] [I]LIKE '%%'

Review Comment:
   Also, since the code seems to handle more than two `%`, could you add a test 
for patterns like
   
   - `col LIKE '17%%'      // 17%`
   - `col LIKE '%%five%%    // %five%`
   - `col LIKE '%%%%five%%%%    // %%five%%`



##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -3633,32 +3697,123 @@ mod tests {
 
     #[test]
     fn test_like_and_ilke() {
-        // LIKE '%'
-        let expr = like(col("c1"), "%");
+        let null = lit(ScalarValue::Utf8(None));
+
+        // expr [NOT] [I]LIKE NULL
+        let expr = like(col("c1"), null.clone());
+        assert_eq!(simplify(expr), lit_bool_null());
+
+        let expr = not_like(col("c1"), null.clone());
+        assert_eq!(simplify(expr), lit_bool_null());
+
+        let expr = ilike(col("c1"), null.clone());
+        assert_eq!(simplify(expr), lit_bool_null());
+
+        let expr = not_ilike(col("c1"), null.clone());
+        assert_eq!(simplify(expr), lit_bool_null());
+
+        // expr [NOT] [I]LIKE '%'
+        let expr = like(col("c1"), lit("%"));
+        assert_eq!(simplify(expr), if_not_null(col("c1"), true));
+
+        let expr = not_like(col("c1"), lit("%"));
+        assert_eq!(simplify(expr), if_not_null(col("c1"), false));
+
+        let expr = ilike(col("c1"), lit("%"));
+        assert_eq!(simplify(expr), if_not_null(col("c1"), true));
+
+        let expr = not_ilike(col("c1"), lit("%"));
+        assert_eq!(simplify(expr), if_not_null(col("c1"), false));
+
+        // expr [NOT] [I]LIKE '%%'

Review Comment:
   I don't understand this simplification 
   
   I thought `expr LIKE '%%'` means  the same as `expr = '%'`  (match the 
actual literal character `%` )
   
   Specifically that `%%` is how to escape the wildcard to not be a wildcard
   
   This test looks like it has applied the rewrite needed for `expr LIKE '%'` 
which matches any non null input
   
   🤔  



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to