This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new ccb4baf0fc Initial support for `StringView`, merge changes from 
`string-view` development branch (#11402)
ccb4baf0fc is described below

commit ccb4baf0fc6b4dee983bb29f2282b9c19510a481
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Jul 16 15:52:35 2024 -0400

    Initial support for `StringView`, merge changes from `string-view` 
development branch (#11402)
    
    * Update `string-view` branch to arrow-rs main (#10966)
    
    * Pin to arrow main
    
    * Fix clippy with latest arrow
    
    * Uncomment test that needs new arrow-rs to work
    
    * Update datafusion-cli Cargo.lock
    
    * Update Cargo.lock
    
    * tapelo
    
    * feat: Implement equality = and inequality <> support for StringView 
(#10985)
    
    * feat: Implement equality = and inequality <> support for StringView
    
    * chore: Add tests for the StringView
    
    * chore
    
    * chore: Update tests for NULL
    
    * fix: Used build_array_string!
    
    * chore: Update string_coercion function to handle Utf8View type in 
binary.rs
    
    * chore: add tests
    
    * chore: ci
    
    * Add more StringView comparison test coverage (#10997)
    
    * Add more StringView comparison test coverage
    
    * add reference
    
    * Add another test showing casting on columns works correctly
    
    * feat: Implement equality = and inequality <> support for BinaryView 
(#11004)
    
    * feat: Implement equality = and inequality <> support for BinaryView
    
    Signed-off-by: Chojan Shang <[email protected]>
    
    * chore: make fmt happy
    
    Signed-off-by: Chojan Shang <[email protected]>
    
    ---------
    
    Signed-off-by: Chojan Shang <[email protected]>
    
    * Implement support for LargeString and LargeBinary for StringView and 
BinaryView (#11034)
    
    * implement large binary
    
    * add tests for large string
    
    * better comments for string coercion
    
    * Improve filter predicates with `Utf8View` literals (#11043)
    
    * refactor: Improve type coercion logic in TypeCoercionRewriter
    
    * refactor: Improve type coercion logic in TypeCoercionRewriter
    
    * chore
    
    * chore: Update test
    
    * refactor: Improve type coercion logic in TypeCoercionRewriter
    
    * refactor: Remove unused import and update code formatting in 
unwrap_cast_in_comparison.rs
    
    * Remove arrow-patch
    
    ---------
    
    Signed-off-by: Chojan Shang <[email protected]>
    Co-authored-by: Alex Huang <[email protected]>
    Co-authored-by: Chojan Shang <[email protected]>
    Co-authored-by: Xiangpeng Hao <[email protected]>
---
 datafusion/common/src/scalar/mod.rs                |   8 +-
 datafusion/expr/src/type_coercion/binary.rs        |  36 ++-
 .../optimizer/src/unwrap_cast_in_comparison.rs     |  26 +-
 datafusion/sqllogictest/test_files/binary_view.slt | 202 +++++++++++++
 datafusion/sqllogictest/test_files/string_view.slt | 326 +++++++++++++++++++++
 5 files changed, 566 insertions(+), 32 deletions(-)

diff --git a/datafusion/common/src/scalar/mod.rs 
b/datafusion/common/src/scalar/mod.rs
index c891e85aa5..38f70e4c14 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -1682,8 +1682,10 @@ impl ScalarValue {
             DataType::UInt16 => build_array_primitive!(UInt16Array, UInt16),
             DataType::UInt32 => build_array_primitive!(UInt32Array, UInt32),
             DataType::UInt64 => build_array_primitive!(UInt64Array, UInt64),
+            DataType::Utf8View => build_array_string!(StringViewArray, 
Utf8View),
             DataType::Utf8 => build_array_string!(StringArray, Utf8),
             DataType::LargeUtf8 => build_array_string!(LargeStringArray, 
LargeUtf8),
+            DataType::BinaryView => build_array_string!(BinaryViewArray, 
BinaryView),
             DataType::Binary => build_array_string!(BinaryArray, Binary),
             DataType::LargeBinary => build_array_string!(LargeBinaryArray, 
LargeBinary),
             DataType::Date32 => build_array_primitive!(Date32Array, Date32),
@@ -1841,8 +1843,6 @@ impl ScalarValue {
             | DataType::Time64(TimeUnit::Millisecond)
             | DataType::Map(_, _)
             | DataType::RunEndEncoded(_, _)
-            | DataType::Utf8View
-            | DataType::BinaryView
             | DataType::ListView(_)
             | DataType::LargeListView(_) => {
                 return _internal_err!(
@@ -5695,16 +5695,12 @@ mod tests {
             DataType::Dictionary(Box::new(DataType::Int32), 
Box::new(DataType::Utf8)),
         );
 
-        // needs https://github.com/apache/arrow-rs/issues/5893
-        /*
         check_scalar_cast(ScalarValue::Utf8(None), DataType::Utf8View);
         check_scalar_cast(ScalarValue::from("foo"), DataType::Utf8View);
         check_scalar_cast(
             ScalarValue::from("larger than 12 bytes string"),
             DataType::Utf8View,
         );
-
-         */
     }
 
     // mimics how casting work on scalar values by `casting` `scalar` to 
`desired_type`
diff --git a/datafusion/expr/src/type_coercion/binary.rs 
b/datafusion/expr/src/type_coercion/binary.rs
index 4f79f3fa2b..70139aaa4a 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -919,16 +919,21 @@ fn string_concat_internal_coercion(
     }
 }
 
-/// Coercion rules for string types (Utf8/LargeUtf8): If at least one argument 
is
-/// a string type and both arguments can be coerced into a string type, coerce
-/// to string type.
+/// Coercion rules for string view types (Utf8/LargeUtf8/Utf8View):
+/// If at least one argument is a string view, we coerce to string view
+/// based on the observation that StringArray to StringViewArray is cheap but 
not vice versa.
+///
+/// Between Utf8 and LargeUtf8, we coerce to LargeUtf8.
 fn string_coercion(lhs_type: &DataType, rhs_type: &DataType) -> 
Option<DataType> {
     use arrow::datatypes::DataType::*;
     match (lhs_type, rhs_type) {
+        // If Utf8View is in any side, we coerce to Utf8View.
+        (Utf8View, Utf8View | Utf8 | LargeUtf8) | (Utf8 | LargeUtf8, Utf8View) 
=> {
+            Some(Utf8View)
+        }
+        // Then, if LargeUtf8 is in any side, we coerce to LargeUtf8.
+        (LargeUtf8, Utf8 | LargeUtf8) | (Utf8, LargeUtf8) => Some(LargeUtf8),
         (Utf8, Utf8) => Some(Utf8),
-        (LargeUtf8, Utf8) => Some(LargeUtf8),
-        (Utf8, LargeUtf8) => Some(LargeUtf8),
-        (LargeUtf8, LargeUtf8) => Some(LargeUtf8),
         _ => None,
     }
 }
@@ -975,15 +980,26 @@ fn binary_to_string_coercion(
     }
 }
 
-/// Coercion rules for binary types (Binary/LargeBinary): If at least one 
argument is
+/// Coercion rules for binary types (Binary/LargeBinary/BinaryView): If at 
least one argument is
 /// a binary type and both arguments can be coerced into a binary type, coerce
 /// to binary type.
 fn binary_coercion(lhs_type: &DataType, rhs_type: &DataType) -> 
Option<DataType> {
     use arrow::datatypes::DataType::*;
     match (lhs_type, rhs_type) {
-        (Binary | Utf8, Binary) | (Binary, Utf8) => Some(Binary),
-        (LargeBinary | Binary | Utf8 | LargeUtf8, LargeBinary)
-        | (LargeBinary, Binary | Utf8 | LargeUtf8) => Some(LargeBinary),
+        // If BinaryView is in any side, we coerce to BinaryView.
+        (BinaryView, BinaryView | Binary | LargeBinary | Utf8 | LargeUtf8 | 
Utf8View)
+        | (LargeBinary | Binary | Utf8 | LargeUtf8 | Utf8View, BinaryView) => {
+            Some(BinaryView)
+        }
+        // Prefer LargeBinary over Binary
+        (LargeBinary | Binary | Utf8 | LargeUtf8 | Utf8View, LargeBinary)
+        | (LargeBinary, Binary | Utf8 | LargeUtf8 | Utf8View) => 
Some(LargeBinary),
+
+        // If Utf8View/LargeUtf8 presents need to be large Binary
+        (Utf8View | LargeUtf8, Binary) | (Binary, Utf8View | LargeUtf8) => {
+            Some(LargeBinary)
+        }
+        (Binary, Utf8) | (Utf8, Binary) => Some(Binary),
         _ => None,
     }
 }
diff --git a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs 
b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
index 9941da9dd6..7238dd5bbd 100644
--- a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
+++ b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
@@ -33,7 +33,7 @@ use datafusion_common::tree_node::{Transformed, TreeNode, 
TreeNodeRewriter};
 use datafusion_common::{internal_err, DFSchema, DFSchemaRef, Result, 
ScalarValue};
 use datafusion_expr::expr::{BinaryExpr, Cast, InList, TryCast};
 use datafusion_expr::utils::merge_schema;
-use datafusion_expr::{lit, Expr, ExprSchemable, LogicalPlan, Operator};
+use datafusion_expr::{lit, Expr, ExprSchemable, LogicalPlan};
 
 /// [`UnwrapCastInComparison`] attempts to remove casts from
 /// comparisons to literals ([`ScalarValue`]s) by applying the casts
@@ -146,7 +146,7 @@ impl TreeNodeRewriter for UnwrapCastExprRewriter {
                     };
                     is_supported_type(&left_type)
                         && is_supported_type(&right_type)
-                        && is_comparison_op(op)
+                        && op.is_comparison_operator()
                 } =>
             {
                 match (left.as_mut(), right.as_mut()) {
@@ -262,18 +262,6 @@ impl TreeNodeRewriter for UnwrapCastExprRewriter {
     }
 }
 
-fn is_comparison_op(op: &Operator) -> bool {
-    matches!(
-        op,
-        Operator::Eq
-            | Operator::NotEq
-            | Operator::Gt
-            | Operator::GtEq
-            | Operator::Lt
-            | Operator::LtEq
-    )
-}
-
 /// Returns true if [UnwrapCastExprRewriter] supports this data type
 fn is_supported_type(data_type: &DataType) -> bool {
     is_supported_numeric_type(data_type)
@@ -300,7 +288,10 @@ fn is_supported_numeric_type(data_type: &DataType) -> bool 
{
 
 /// Returns true if [UnwrapCastExprRewriter] supports casting this value as a 
string
 fn is_supported_string_type(data_type: &DataType) -> bool {
-    matches!(data_type, DataType::Utf8 | DataType::LargeUtf8)
+    matches!(
+        data_type,
+        DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View
+    )
 }
 
 /// Returns true if [UnwrapCastExprRewriter] supports casting this value as a 
dictionary
@@ -473,12 +464,15 @@ fn try_cast_string_literal(
     target_type: &DataType,
 ) -> Option<ScalarValue> {
     let string_value = match lit_value {
-        ScalarValue::Utf8(s) | ScalarValue::LargeUtf8(s) => s.clone(),
+        ScalarValue::Utf8(s) | ScalarValue::LargeUtf8(s) | 
ScalarValue::Utf8View(s) => {
+            s.clone()
+        }
         _ => return None,
     };
     let scalar_value = match target_type {
         DataType::Utf8 => ScalarValue::Utf8(string_value),
         DataType::LargeUtf8 => ScalarValue::LargeUtf8(string_value),
+        DataType::Utf8View => ScalarValue::Utf8View(string_value),
         _ => return None,
     };
     Some(scalar_value)
diff --git a/datafusion/sqllogictest/test_files/binary_view.slt 
b/datafusion/sqllogictest/test_files/binary_view.slt
new file mode 100644
index 0000000000..de0f0bea7f
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/binary_view.slt
@@ -0,0 +1,202 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+########
+## Test setup
+########
+
+statement ok
+create table test_source as values
+    ('Andrew', 'X'),
+    ('Xiangpeng', 'Xiangpeng'),
+    ('Raphael', 'R'),
+    (NULL, 'R')
+;
+
+# Table with the different combination of column types
+statement ok
+CREATE TABLE test AS
+SELECT
+  arrow_cast(column1, 'Utf8') as column1_utf8,
+  arrow_cast(column2, 'Utf8') as column2_utf8,
+  arrow_cast(column1, 'Binary') AS column1_binary,
+  arrow_cast(column2, 'Binary') AS column2_binary,
+  arrow_cast(column1, 'LargeBinary') AS column1_large_binary,
+  arrow_cast(column2, 'LargeBinary') AS column2_large_binary,
+  arrow_cast(arrow_cast(column1, 'Binary'), 'BinaryView') AS 
column1_binaryview,
+  arrow_cast(arrow_cast(column2, 'Binary'), 'BinaryView') AS 
column2_binaryview,
+  arrow_cast(column1, 'Dictionary(Int32, Binary)') AS column1_dict,
+  arrow_cast(column2, 'Dictionary(Int32, Binary)') AS column2_dict
+FROM test_source;
+
+statement ok
+drop table test_source
+
+########
+## BinaryView to BinaryView
+########
+
+# BinaryView scalar to BinaryView scalar
+
+query BBBB
+SELECT
+  arrow_cast(arrow_cast('NULL', 'Binary'), 'BinaryView') = 
arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') AS comparison1,
+  arrow_cast(arrow_cast('NULL', 'Binary'), 'BinaryView') <> 
arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') AS comparison2,
+  arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') = 
arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') AS comparison3,
+  arrow_cast(arrow_cast('Xiangpeng', 'Binary'), 'BinaryView') <> 
arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') AS comparison4;
+----
+false true true true
+
+
+# BinaryView column to BinaryView column comparison as filters
+
+query TT
+select column1_utf8, column2_utf8 from test where column1_binaryview = 
column2_binaryview;
+----
+Xiangpeng Xiangpeng
+
+query TT
+select column1_utf8, column2_utf8 from test where column1_binaryview <> 
column2_binaryview;
+----
+Andrew X
+Raphael R
+
+# BinaryView column to BinaryView column
+query TTBB
+select
+  column1_utf8, column2_utf8,
+  column1_binaryview = column2_binaryview,
+  column1_binaryview <> column2_binaryview
+from test;
+----
+Andrew X false true
+Xiangpeng Xiangpeng true false
+Raphael R false true
+NULL R NULL NULL
+
+# BinaryView column to BinaryView scalar comparison
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_binaryview                 = arrow_cast(arrow_cast('Andrew', 
'Binary'), 'BinaryView'),
+  arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') = 
column1_binaryview,
+  column1_binaryview                 <> arrow_cast(arrow_cast('Andrew', 
'Binary'), 'BinaryView'),
+  arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') <> 
column1_binaryview
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+########
+## BinaryView to Binary
+########
+
+# test BinaryViewArray with Binary columns
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_binaryview  = column2_binary,
+  column2_binary      = column1_binaryview,
+  column1_binaryview <> column2_binary,
+  column2_binary     <> column1_binaryview
+from test;
+----
+Andrew X false false true true
+Xiangpeng Xiangpeng true true false false
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# test BinaryViewArray with LargeBinary columns
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_binaryview  = column2_large_binary,
+  column2_large_binary      = column1_binaryview,
+  column1_binaryview <> column2_large_binary,
+  column2_large_binary     <> column1_binaryview
+from test;
+----
+Andrew X false false true true
+Xiangpeng Xiangpeng true true false false
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# BinaryView column to Binary scalar
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_binaryview                 = arrow_cast('Andrew', 'Binary'),
+  arrow_cast('Andrew', 'Binary')     = column1_binaryview,
+  column1_binaryview                <> arrow_cast('Andrew', 'Binary'),
+  arrow_cast('Andrew', 'Binary')     <> column1_binaryview
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# BinaryView column to LargeBinary scalar
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_binaryview                 = arrow_cast('Andrew', 'LargeBinary'),
+  arrow_cast('Andrew', 'LargeBinary')     = column1_binaryview,
+  column1_binaryview                <> arrow_cast('Andrew', 'LargeBinary'),
+  arrow_cast('Andrew', 'LargeBinary')     <> column1_binaryview
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# Binary column to BinaryView scalar
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_binary                     = arrow_cast(arrow_cast('Andrew', 
'Binary'), 'BinaryView'),
+  arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') = column1_binary,
+  column1_binary                     <> arrow_cast(arrow_cast('Andrew', 
'Binary'), 'BinaryView'),
+  arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') <> column1_binary
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+
+# LargeBinary column to BinaryView scalar
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_large_binary                     = arrow_cast(arrow_cast('Andrew', 
'Binary'), 'BinaryView'),
+  arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') = 
column1_large_binary,
+  column1_large_binary                     <> arrow_cast(arrow_cast('Andrew', 
'Binary'), 'BinaryView'),
+  arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') <> 
column1_large_binary
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+statement ok
+drop table test;
\ No newline at end of file
diff --git a/datafusion/sqllogictest/test_files/string_view.slt 
b/datafusion/sqllogictest/test_files/string_view.slt
new file mode 100644
index 0000000000..3ba4e271c2
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -0,0 +1,326 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+########
+## Test setup
+########
+
+statement ok
+create table test_source as values
+    ('Andrew', 'X'),
+    ('Xiangpeng', 'Xiangpeng'),
+    ('Raphael', 'R'),
+    (NULL, 'R')
+;
+
+# Table with the different combination of column types
+statement ok
+create table test as
+SELECT
+  arrow_cast(column1, 'Utf8') as column1_utf8,
+  arrow_cast(column2, 'Utf8') as column2_utf8,
+  arrow_cast(column1, 'LargeUtf8') as column1_large_utf8,
+  arrow_cast(column2, 'LargeUtf8') as column2_large_utf8,
+  arrow_cast(column1, 'Utf8View') as column1_utf8view,
+  arrow_cast(column2, 'Utf8View') as column2_utf8view,
+  arrow_cast(column1, 'Dictionary(Int32, Utf8)') as column1_dict,
+  arrow_cast(column2, 'Dictionary(Int32, Utf8)') as column2_dict
+FROM test_source;
+
+statement ok
+drop table test_source
+
+########
+## StringView to StringView
+########
+
+# StringView scalar to StringView scalar
+
+query BBBB
+select
+  arrow_cast('NULL', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('NULL', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Xiangpeng', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View');
+----
+false true true true
+
+
+# StringView column to StringView column comparison as filters
+
+query TT
+select column1_utf8, column2_utf8 from test where column1_utf8view = 
column2_utf8view;
+----
+Xiangpeng Xiangpeng
+
+query TT
+select column1_utf8, column2_utf8 from test where column1_utf8view <> 
column2_utf8view;
+----
+Andrew X
+Raphael R
+
+# StringView column to StringView column
+query TTBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8view = column2_utf8view,
+  column1_utf8view <> column2_utf8view
+from test;
+----
+Andrew X false true
+Xiangpeng Xiangpeng true false
+Raphael R false true
+NULL R NULL NULL
+
+# StringView column to StringView scalar comparison
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8view                 = arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') = column1_utf8view,
+  column1_utf8view                 <> arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') <> column1_utf8view
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+########
+## StringView to String
+########
+
+# test StringViewArray with Utf8 columns
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8view  = column2_utf8,
+  column2_utf8      = column1_utf8view,
+  column1_utf8view <> column2_utf8,
+  column2_utf8     <> column1_utf8view
+from test;
+----
+Andrew X false false true true
+Xiangpeng Xiangpeng true true false false
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# test StringViewArray with LargeUtf8 columns
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8view  = column2_large_utf8,
+  column2_large_utf8      = column1_utf8view,
+  column1_utf8view <> column2_large_utf8,
+  column2_large_utf8     <> column1_utf8view
+from test;
+----
+Andrew X false false true true
+Xiangpeng Xiangpeng true true false false
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+
+# StringView column to String scalar
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8view                 = arrow_cast('Andrew', 'Utf8'),
+  arrow_cast('Andrew', 'Utf8')     = column1_utf8view,
+  column1_utf8view                 <> arrow_cast('Andrew', 'Utf8'),
+  arrow_cast('Andrew', 'Utf8')     <> column1_utf8view
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# StringView column to LargeString scalar
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8view                 = arrow_cast('Andrew', 'LargeUtf8'),
+  arrow_cast('Andrew', 'LargeUtf8')     = column1_utf8view,
+  column1_utf8view                 <> arrow_cast('Andrew', 'LargeUtf8'),
+  arrow_cast('Andrew', 'LargeUtf8')     <> column1_utf8view
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# String column to StringView scalar
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8                     = arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') = column1_utf8,
+  column1_utf8                    <> arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') <> column1_utf8
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# LargeString column to StringView scalar
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_large_utf8                     = arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') = column1_large_utf8,
+  column1_large_utf8                    <> arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') <> column1_large_utf8
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+########
+## StringView to Dictionary
+########
+
+# test StringViewArray with Dictionary columns
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8view  = column2_dict,
+  column2_dict      = column1_utf8view,
+  column1_utf8view <> column2_dict,
+  column2_dict     <> column1_utf8view
+from test;
+----
+Andrew X false false true true
+Xiangpeng Xiangpeng true true false false
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# StringView column to Dict scalar
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8view                 = arrow_cast('Andrew', 'Dictionary(Int32, 
Utf8)'),
+  arrow_cast('Andrew', 'Dictionary(Int32, Utf8)')     = column1_utf8view,
+  column1_utf8view                 <> arrow_cast('Andrew', 'Dictionary(Int32, 
Utf8)'),
+  arrow_cast('Andrew', 'Dictionary(Int32, Utf8)')     <> column1_utf8view
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# Dict column to StringView scalar
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_dict                     = arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') = column1_dict,
+  column1_dict                    <> arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') <> column1_dict
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+
+########
+## Coercion Rules
+########
+
+
+statement ok
+set datafusion.explain.logical_plan_only = true;
+
+
+# Filter should have a StringView literal and no column cast
+query TT
+explain SELECT column1_utf8 from test where column1_utf8view = 'Andrew';
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = Utf8View("Andrew")
+03)----TableScan: test projection=[column1_utf8, column1_utf8view]
+
+# reverse order should be the same
+query TT
+explain SELECT column1_utf8 from test where 'Andrew' = column1_utf8view;
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = Utf8View("Andrew")
+03)----TableScan: test projection=[column1_utf8, column1_utf8view]
+
+query TT
+explain SELECT column1_utf8 from test where column1_utf8 = 
arrow_cast('Andrew', 'Utf8View');
+----
+logical_plan
+01)Filter: test.column1_utf8 = Utf8("Andrew")
+02)--TableScan: test projection=[column1_utf8]
+
+query TT
+explain SELECT column1_utf8 from test where arrow_cast('Andrew', 'Utf8View') = 
column1_utf8;
+----
+logical_plan
+01)Filter: test.column1_utf8 = Utf8("Andrew")
+02)--TableScan: test projection=[column1_utf8]
+
+query TT
+explain SELECT column1_utf8 from test where column1_utf8view = 
arrow_cast('Andrew', 'Dictionary(Int32, Utf8)');
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = Utf8View("Andrew")
+03)----TableScan: test projection=[column1_utf8, column1_utf8view]
+
+query TT
+explain SELECT column1_utf8 from test where arrow_cast('Andrew', 
'Dictionary(Int32, Utf8)') = column1_utf8view;
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = Utf8View("Andrew")
+03)----TableScan: test projection=[column1_utf8, column1_utf8view]
+
+# compare string / stringview
+# Should cast string -> stringview (which is cheap), not stringview -> string 
(which is not)
+query TT
+explain SELECT column1_utf8 from test where column1_utf8view = column2_utf8;
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = CAST(test.column2_utf8 AS Utf8View)
+03)----TableScan: test projection=[column1_utf8, column2_utf8, 
column1_utf8view]
+
+query TT
+explain SELECT column1_utf8 from test where column2_utf8 = column1_utf8view;
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: CAST(test.column2_utf8 AS Utf8View) = test.column1_utf8view
+03)----TableScan: test projection=[column1_utf8, column2_utf8, 
column1_utf8view]
+
+
+statement ok
+drop table test;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to