This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch string-view
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/string-view by this push:
     new 9e6cd31a1f Improve filter predicates with `Utf8View` literals (#11043)
9e6cd31a1f is described below

commit 9e6cd31a1fffe58ec71bcbcd645b6d672b6b7309
Author: Alex Huang <huangweijun1...@gmail.com>
AuthorDate: Thu Jun 27 01:31:42 2024 +0800

    Improve filter predicates with `Utf8View` literals (#11043)
    
    * refactor: Improve type coercion logic in TypeCoercionRewriter
    
    * refactor: Improve type coercion logic in TypeCoercionRewriter
    
    * chore
    
    * chore: Update test
    
    * refactor: Improve type coercion logic in TypeCoercionRewriter
    
    * refactor: Remove unused import and update code formatting in 
unwrap_cast_in_comparison.rs
---
 .../optimizer/src/unwrap_cast_in_comparison.rs     | 26 +++++++++-------------
 datafusion/sqllogictest/test_files/string_view.slt | 18 +++++++++++++--
 2 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs 
b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
index 07a946c1ad..a43c64a813 100644
--- a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
+++ b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
@@ -33,7 +33,7 @@ use datafusion_common::tree_node::{Transformed, TreeNode, 
TreeNodeRewriter};
 use datafusion_common::{internal_err, DFSchema, DFSchemaRef, Result, 
ScalarValue};
 use datafusion_expr::expr::{BinaryExpr, Cast, InList, TryCast};
 use datafusion_expr::utils::merge_schema;
-use datafusion_expr::{lit, Expr, ExprSchemable, LogicalPlan, Operator};
+use datafusion_expr::{lit, Expr, ExprSchemable, LogicalPlan};
 
 /// [`UnwrapCastInComparison`] attempts to remove casts from
 /// comparisons to literals ([`ScalarValue`]s) by applying the casts
@@ -154,7 +154,7 @@ impl TreeNodeRewriter for UnwrapCastExprRewriter {
                     };
                     is_supported_type(&left_type)
                         && is_supported_type(&right_type)
-                        && is_comparison_op(op)
+                        && op.is_comparison_operator()
                 } =>
             {
                 match (left.as_mut(), right.as_mut()) {
@@ -270,18 +270,6 @@ impl TreeNodeRewriter for UnwrapCastExprRewriter {
     }
 }
 
-fn is_comparison_op(op: &Operator) -> bool {
-    matches!(
-        op,
-        Operator::Eq
-            | Operator::NotEq
-            | Operator::Gt
-            | Operator::GtEq
-            | Operator::Lt
-            | Operator::LtEq
-    )
-}
-
 /// Returns true if [UnwrapCastExprRewriter] supports this data type
 fn is_supported_type(data_type: &DataType) -> bool {
     is_supported_numeric_type(data_type)
@@ -308,7 +296,10 @@ fn is_supported_numeric_type(data_type: &DataType) -> bool 
{
 
 /// Returns true if [UnwrapCastExprRewriter] supports casting this value as a 
string
 fn is_supported_string_type(data_type: &DataType) -> bool {
-    matches!(data_type, DataType::Utf8 | DataType::LargeUtf8)
+    matches!(
+        data_type,
+        DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View
+    )
 }
 
 /// Returns true if [UnwrapCastExprRewriter] supports casting this value as a 
dictionary
@@ -481,12 +472,15 @@ fn try_cast_string_literal(
     target_type: &DataType,
 ) -> Option<ScalarValue> {
     let string_value = match lit_value {
-        ScalarValue::Utf8(s) | ScalarValue::LargeUtf8(s) => s.clone(),
+        ScalarValue::Utf8(s) | ScalarValue::LargeUtf8(s) | 
ScalarValue::Utf8View(s) => {
+            s.clone()
+        }
         _ => return None,
     };
     let scalar_value = match target_type {
         DataType::Utf8 => ScalarValue::Utf8(string_value),
         DataType::LargeUtf8 => ScalarValue::LargeUtf8(string_value),
+        DataType::Utf8View => ScalarValue::Utf8View(string_value),
         _ => return None,
     };
     Some(scalar_value)
diff --git a/datafusion/sqllogictest/test_files/string_view.slt 
b/datafusion/sqllogictest/test_files/string_view.slt
index 7c9fbf4735..3ba4e271c2 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -273,12 +273,18 @@ logical_plan
 02)--Filter: test.column1_utf8view = Utf8View("Andrew")
 03)----TableScan: test projection=[column1_utf8, column1_utf8view]
 
-# should not be casting the column: 
https://github.com/apache/datafusion/issues/10998
 query TT
 explain SELECT column1_utf8 from test where column1_utf8 = 
arrow_cast('Andrew', 'Utf8View');
 ----
 logical_plan
-01)Filter: CAST(test.column1_utf8 AS Utf8View) = Utf8View("Andrew")
+01)Filter: test.column1_utf8 = Utf8("Andrew")
+02)--TableScan: test projection=[column1_utf8]
+
+query TT
+explain SELECT column1_utf8 from test where arrow_cast('Andrew', 'Utf8View') = 
column1_utf8;
+----
+logical_plan
+01)Filter: test.column1_utf8 = Utf8("Andrew")
 02)--TableScan: test projection=[column1_utf8]
 
 query TT
@@ -289,6 +295,14 @@ logical_plan
 02)--Filter: test.column1_utf8view = Utf8View("Andrew")
 03)----TableScan: test projection=[column1_utf8, column1_utf8view]
 
+query TT
+explain SELECT column1_utf8 from test where arrow_cast('Andrew', 
'Dictionary(Int32, Utf8)') = column1_utf8view;
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = Utf8View("Andrew")
+03)----TableScan: test projection=[column1_utf8, column1_utf8view]
+
 # compare string / stringview
 # Should cast string -> stringview (which is cheap), not stringview -> string 
(which is not)
 query TT


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org
For additional commands, e-mail: commits-h...@datafusion.apache.org

Reply via email to