This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new ccb4baf0fc Initial support for `StringView`, merge changes from
`string-view` development branch (#11402)
ccb4baf0fc is described below
commit ccb4baf0fc6b4dee983bb29f2282b9c19510a481
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Jul 16 15:52:35 2024 -0400
Initial support for `StringView`, merge changes from `string-view`
development branch (#11402)
* Update `string-view` branch to arrow-rs main (#10966)
* Pin to arrow main
* Fix clippy with latest arrow
* Uncomment test that needs new arrow-rs to work
* Update datafusion-cli Cargo.lock
* Update Cargo.lock
* tapelo
* feat: Implement equality = and inequality <> support for StringView
(#10985)
* feat: Implement equality = and inequality <> support for StringView
* chore: Add tests for the StringView
* chore
* chore: Update tests for NULL
* fix: Used build_array_string!
* chore: Update string_coercion function to handle Utf8View type in
binary.rs
* chore: add tests
* chore: ci
* Add more StringView comparison test coverage (#10997)
* Add more StringView comparison test coverage
* add reference
* Add another test showing casting on columns works correctly
* feat: Implement equality = and inequality <> support for BinaryView
(#11004)
* feat: Implement equality = and inequality <> support for BinaryView
Signed-off-by: Chojan Shang <[email protected]>
* chore: make fmt happy
Signed-off-by: Chojan Shang <[email protected]>
---------
Signed-off-by: Chojan Shang <[email protected]>
* Implement support for LargeString and LargeBinary for StringView and
BinaryView (#11034)
* implement large binary
* add tests for large string
* better comments for string coercion
* Improve filter predicates with `Utf8View` literals (#11043)
* refactor: Improve type coercion logic in TypeCoercionRewriter
* refactor: Improve type coercion logic in TypeCoercionRewriter
* chore
* chore: Update test
* refactor: Improve type coercion logic in TypeCoercionRewriter
* refactor: Remove unused import and update code formatting in
unwrap_cast_in_comparison.rs
* Remove arrow-patch
---------
Signed-off-by: Chojan Shang <[email protected]>
Co-authored-by: Alex Huang <[email protected]>
Co-authored-by: Chojan Shang <[email protected]>
Co-authored-by: Xiangpeng Hao <[email protected]>
---
datafusion/common/src/scalar/mod.rs | 8 +-
datafusion/expr/src/type_coercion/binary.rs | 36 ++-
.../optimizer/src/unwrap_cast_in_comparison.rs | 26 +-
datafusion/sqllogictest/test_files/binary_view.slt | 202 +++++++++++++
datafusion/sqllogictest/test_files/string_view.slt | 326 +++++++++++++++++++++
5 files changed, 566 insertions(+), 32 deletions(-)
diff --git a/datafusion/common/src/scalar/mod.rs
b/datafusion/common/src/scalar/mod.rs
index c891e85aa5..38f70e4c14 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -1682,8 +1682,10 @@ impl ScalarValue {
DataType::UInt16 => build_array_primitive!(UInt16Array, UInt16),
DataType::UInt32 => build_array_primitive!(UInt32Array, UInt32),
DataType::UInt64 => build_array_primitive!(UInt64Array, UInt64),
+ DataType::Utf8View => build_array_string!(StringViewArray,
Utf8View),
DataType::Utf8 => build_array_string!(StringArray, Utf8),
DataType::LargeUtf8 => build_array_string!(LargeStringArray,
LargeUtf8),
+ DataType::BinaryView => build_array_string!(BinaryViewArray,
BinaryView),
DataType::Binary => build_array_string!(BinaryArray, Binary),
DataType::LargeBinary => build_array_string!(LargeBinaryArray,
LargeBinary),
DataType::Date32 => build_array_primitive!(Date32Array, Date32),
@@ -1841,8 +1843,6 @@ impl ScalarValue {
| DataType::Time64(TimeUnit::Millisecond)
| DataType::Map(_, _)
| DataType::RunEndEncoded(_, _)
- | DataType::Utf8View
- | DataType::BinaryView
| DataType::ListView(_)
| DataType::LargeListView(_) => {
return _internal_err!(
@@ -5695,16 +5695,12 @@ mod tests {
DataType::Dictionary(Box::new(DataType::Int32),
Box::new(DataType::Utf8)),
);
- // needs https://github.com/apache/arrow-rs/issues/5893
- /*
check_scalar_cast(ScalarValue::Utf8(None), DataType::Utf8View);
check_scalar_cast(ScalarValue::from("foo"), DataType::Utf8View);
check_scalar_cast(
ScalarValue::from("larger than 12 bytes string"),
DataType::Utf8View,
);
-
- */
}
// mimics how casting work on scalar values by `casting` `scalar` to
`desired_type`
diff --git a/datafusion/expr/src/type_coercion/binary.rs
b/datafusion/expr/src/type_coercion/binary.rs
index 4f79f3fa2b..70139aaa4a 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -919,16 +919,21 @@ fn string_concat_internal_coercion(
}
}
-/// Coercion rules for string types (Utf8/LargeUtf8): If at least one argument
is
-/// a string type and both arguments can be coerced into a string type, coerce
-/// to string type.
+/// Coercion rules for string view types (Utf8/LargeUtf8/Utf8View):
+/// If at least one argument is a string view, we coerce to string view
+/// based on the observation that StringArray to StringViewArray is cheap but
not vice versa.
+///
+/// Between Utf8 and LargeUtf8, we coerce to LargeUtf8.
fn string_coercion(lhs_type: &DataType, rhs_type: &DataType) ->
Option<DataType> {
use arrow::datatypes::DataType::*;
match (lhs_type, rhs_type) {
+ // If Utf8View is in any side, we coerce to Utf8View.
+ (Utf8View, Utf8View | Utf8 | LargeUtf8) | (Utf8 | LargeUtf8, Utf8View)
=> {
+ Some(Utf8View)
+ }
+ // Then, if LargeUtf8 is in any side, we coerce to LargeUtf8.
+ (LargeUtf8, Utf8 | LargeUtf8) | (Utf8, LargeUtf8) => Some(LargeUtf8),
(Utf8, Utf8) => Some(Utf8),
- (LargeUtf8, Utf8) => Some(LargeUtf8),
- (Utf8, LargeUtf8) => Some(LargeUtf8),
- (LargeUtf8, LargeUtf8) => Some(LargeUtf8),
_ => None,
}
}
@@ -975,15 +980,26 @@ fn binary_to_string_coercion(
}
}
-/// Coercion rules for binary types (Binary/LargeBinary): If at least one
argument is
+/// Coercion rules for binary types (Binary/LargeBinary/BinaryView): If at
least one argument is
/// a binary type and both arguments can be coerced into a binary type, coerce
/// to binary type.
fn binary_coercion(lhs_type: &DataType, rhs_type: &DataType) ->
Option<DataType> {
use arrow::datatypes::DataType::*;
match (lhs_type, rhs_type) {
- (Binary | Utf8, Binary) | (Binary, Utf8) => Some(Binary),
- (LargeBinary | Binary | Utf8 | LargeUtf8, LargeBinary)
- | (LargeBinary, Binary | Utf8 | LargeUtf8) => Some(LargeBinary),
+ // If BinaryView is in any side, we coerce to BinaryView.
+ (BinaryView, BinaryView | Binary | LargeBinary | Utf8 | LargeUtf8 |
Utf8View)
+ | (LargeBinary | Binary | Utf8 | LargeUtf8 | Utf8View, BinaryView) => {
+ Some(BinaryView)
+ }
+ // Prefer LargeBinary over Binary
+ (LargeBinary | Binary | Utf8 | LargeUtf8 | Utf8View, LargeBinary)
+ | (LargeBinary, Binary | Utf8 | LargeUtf8 | Utf8View) =>
Some(LargeBinary),
+
+ // If Utf8View/LargeUtf8 presents need to be large Binary
+ (Utf8View | LargeUtf8, Binary) | (Binary, Utf8View | LargeUtf8) => {
+ Some(LargeBinary)
+ }
+ (Binary, Utf8) | (Utf8, Binary) => Some(Binary),
_ => None,
}
}
diff --git a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
index 9941da9dd6..7238dd5bbd 100644
--- a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
+++ b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
@@ -33,7 +33,7 @@ use datafusion_common::tree_node::{Transformed, TreeNode,
TreeNodeRewriter};
use datafusion_common::{internal_err, DFSchema, DFSchemaRef, Result,
ScalarValue};
use datafusion_expr::expr::{BinaryExpr, Cast, InList, TryCast};
use datafusion_expr::utils::merge_schema;
-use datafusion_expr::{lit, Expr, ExprSchemable, LogicalPlan, Operator};
+use datafusion_expr::{lit, Expr, ExprSchemable, LogicalPlan};
/// [`UnwrapCastInComparison`] attempts to remove casts from
/// comparisons to literals ([`ScalarValue`]s) by applying the casts
@@ -146,7 +146,7 @@ impl TreeNodeRewriter for UnwrapCastExprRewriter {
};
is_supported_type(&left_type)
&& is_supported_type(&right_type)
- && is_comparison_op(op)
+ && op.is_comparison_operator()
} =>
{
match (left.as_mut(), right.as_mut()) {
@@ -262,18 +262,6 @@ impl TreeNodeRewriter for UnwrapCastExprRewriter {
}
}
-fn is_comparison_op(op: &Operator) -> bool {
- matches!(
- op,
- Operator::Eq
- | Operator::NotEq
- | Operator::Gt
- | Operator::GtEq
- | Operator::Lt
- | Operator::LtEq
- )
-}
-
/// Returns true if [UnwrapCastExprRewriter] supports this data type
fn is_supported_type(data_type: &DataType) -> bool {
is_supported_numeric_type(data_type)
@@ -300,7 +288,10 @@ fn is_supported_numeric_type(data_type: &DataType) -> bool
{
/// Returns true if [UnwrapCastExprRewriter] supports casting this value as a
string
fn is_supported_string_type(data_type: &DataType) -> bool {
- matches!(data_type, DataType::Utf8 | DataType::LargeUtf8)
+ matches!(
+ data_type,
+ DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View
+ )
}
/// Returns true if [UnwrapCastExprRewriter] supports casting this value as a
dictionary
@@ -473,12 +464,15 @@ fn try_cast_string_literal(
target_type: &DataType,
) -> Option<ScalarValue> {
let string_value = match lit_value {
- ScalarValue::Utf8(s) | ScalarValue::LargeUtf8(s) => s.clone(),
+ ScalarValue::Utf8(s) | ScalarValue::LargeUtf8(s) |
ScalarValue::Utf8View(s) => {
+ s.clone()
+ }
_ => return None,
};
let scalar_value = match target_type {
DataType::Utf8 => ScalarValue::Utf8(string_value),
DataType::LargeUtf8 => ScalarValue::LargeUtf8(string_value),
+ DataType::Utf8View => ScalarValue::Utf8View(string_value),
_ => return None,
};
Some(scalar_value)
diff --git a/datafusion/sqllogictest/test_files/binary_view.slt
b/datafusion/sqllogictest/test_files/binary_view.slt
new file mode 100644
index 0000000000..de0f0bea7f
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/binary_view.slt
@@ -0,0 +1,202 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+########
+## Test setup
+########
+
+statement ok
+create table test_source as values
+ ('Andrew', 'X'),
+ ('Xiangpeng', 'Xiangpeng'),
+ ('Raphael', 'R'),
+ (NULL, 'R')
+;
+
+# Table with the different combination of column types
+statement ok
+CREATE TABLE test AS
+SELECT
+ arrow_cast(column1, 'Utf8') as column1_utf8,
+ arrow_cast(column2, 'Utf8') as column2_utf8,
+ arrow_cast(column1, 'Binary') AS column1_binary,
+ arrow_cast(column2, 'Binary') AS column2_binary,
+ arrow_cast(column1, 'LargeBinary') AS column1_large_binary,
+ arrow_cast(column2, 'LargeBinary') AS column2_large_binary,
+ arrow_cast(arrow_cast(column1, 'Binary'), 'BinaryView') AS
column1_binaryview,
+ arrow_cast(arrow_cast(column2, 'Binary'), 'BinaryView') AS
column2_binaryview,
+ arrow_cast(column1, 'Dictionary(Int32, Binary)') AS column1_dict,
+ arrow_cast(column2, 'Dictionary(Int32, Binary)') AS column2_dict
+FROM test_source;
+
+statement ok
+drop table test_source
+
+########
+## BinaryView to BinaryView
+########
+
+# BinaryView scalar to BinaryView scalar
+
+query BBBB
+SELECT
+ arrow_cast(arrow_cast('NULL', 'Binary'), 'BinaryView') =
arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') AS comparison1,
+ arrow_cast(arrow_cast('NULL', 'Binary'), 'BinaryView') <>
arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') AS comparison2,
+ arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') =
arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') AS comparison3,
+ arrow_cast(arrow_cast('Xiangpeng', 'Binary'), 'BinaryView') <>
arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') AS comparison4;
+----
+false true true true
+
+
+# BinaryView column to BinaryView column comparison as filters
+
+query TT
+select column1_utf8, column2_utf8 from test where column1_binaryview =
column2_binaryview;
+----
+Xiangpeng Xiangpeng
+
+query TT
+select column1_utf8, column2_utf8 from test where column1_binaryview <>
column2_binaryview;
+----
+Andrew X
+Raphael R
+
+# BinaryView column to BinaryView column
+query TTBB
+select
+ column1_utf8, column2_utf8,
+ column1_binaryview = column2_binaryview,
+ column1_binaryview <> column2_binaryview
+from test;
+----
+Andrew X false true
+Xiangpeng Xiangpeng true false
+Raphael R false true
+NULL R NULL NULL
+
+# BinaryView column to BinaryView scalar comparison
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_binaryview = arrow_cast(arrow_cast('Andrew',
'Binary'), 'BinaryView'),
+ arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') =
column1_binaryview,
+ column1_binaryview <> arrow_cast(arrow_cast('Andrew',
'Binary'), 'BinaryView'),
+ arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') <>
column1_binaryview
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+########
+## BinaryView to Binary
+########
+
+# test BinaryViewArray with Binary columns
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_binaryview = column2_binary,
+ column2_binary = column1_binaryview,
+ column1_binaryview <> column2_binary,
+ column2_binary <> column1_binaryview
+from test;
+----
+Andrew X false false true true
+Xiangpeng Xiangpeng true true false false
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# test BinaryViewArray with LargeBinary columns
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_binaryview = column2_large_binary,
+ column2_large_binary = column1_binaryview,
+ column1_binaryview <> column2_large_binary,
+ column2_large_binary <> column1_binaryview
+from test;
+----
+Andrew X false false true true
+Xiangpeng Xiangpeng true true false false
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# BinaryView column to Binary scalar
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_binaryview = arrow_cast('Andrew', 'Binary'),
+ arrow_cast('Andrew', 'Binary') = column1_binaryview,
+ column1_binaryview <> arrow_cast('Andrew', 'Binary'),
+ arrow_cast('Andrew', 'Binary') <> column1_binaryview
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# BinaryView column to LargeBinary scalar
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_binaryview = arrow_cast('Andrew', 'LargeBinary'),
+ arrow_cast('Andrew', 'LargeBinary') = column1_binaryview,
+ column1_binaryview <> arrow_cast('Andrew', 'LargeBinary'),
+ arrow_cast('Andrew', 'LargeBinary') <> column1_binaryview
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# Binary column to BinaryView scalar
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_binary = arrow_cast(arrow_cast('Andrew',
'Binary'), 'BinaryView'),
+ arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') = column1_binary,
+ column1_binary <> arrow_cast(arrow_cast('Andrew',
'Binary'), 'BinaryView'),
+ arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') <> column1_binary
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+
+# LargeBinary column to BinaryView scalar
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_large_binary = arrow_cast(arrow_cast('Andrew',
'Binary'), 'BinaryView'),
+ arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') =
column1_large_binary,
+ column1_large_binary <> arrow_cast(arrow_cast('Andrew',
'Binary'), 'BinaryView'),
+ arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') <>
column1_large_binary
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+statement ok
+drop table test;
\ No newline at end of file
diff --git a/datafusion/sqllogictest/test_files/string_view.slt
b/datafusion/sqllogictest/test_files/string_view.slt
new file mode 100644
index 0000000000..3ba4e271c2
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -0,0 +1,326 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+########
+## Test setup
+########
+
+statement ok
+create table test_source as values
+ ('Andrew', 'X'),
+ ('Xiangpeng', 'Xiangpeng'),
+ ('Raphael', 'R'),
+ (NULL, 'R')
+;
+
+# Table with the different combination of column types
+statement ok
+create table test as
+SELECT
+ arrow_cast(column1, 'Utf8') as column1_utf8,
+ arrow_cast(column2, 'Utf8') as column2_utf8,
+ arrow_cast(column1, 'LargeUtf8') as column1_large_utf8,
+ arrow_cast(column2, 'LargeUtf8') as column2_large_utf8,
+ arrow_cast(column1, 'Utf8View') as column1_utf8view,
+ arrow_cast(column2, 'Utf8View') as column2_utf8view,
+ arrow_cast(column1, 'Dictionary(Int32, Utf8)') as column1_dict,
+ arrow_cast(column2, 'Dictionary(Int32, Utf8)') as column2_dict
+FROM test_source;
+
+statement ok
+drop table test_source
+
+########
+## StringView to StringView
+########
+
+# StringView scalar to StringView scalar
+
+query BBBB
+select
+ arrow_cast('NULL', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'),
+ arrow_cast('NULL', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View'),
+ arrow_cast('Andrew', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'),
+ arrow_cast('Xiangpeng', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View');
+----
+false true true true
+
+
+# StringView column to StringView column comparison as filters
+
+query TT
+select column1_utf8, column2_utf8 from test where column1_utf8view =
column2_utf8view;
+----
+Xiangpeng Xiangpeng
+
+query TT
+select column1_utf8, column2_utf8 from test where column1_utf8view <>
column2_utf8view;
+----
+Andrew X
+Raphael R
+
+# StringView column to StringView column
+query TTBB
+select
+ column1_utf8, column2_utf8,
+ column1_utf8view = column2_utf8view,
+ column1_utf8view <> column2_utf8view
+from test;
+----
+Andrew X false true
+Xiangpeng Xiangpeng true false
+Raphael R false true
+NULL R NULL NULL
+
+# StringView column to StringView scalar comparison
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_utf8view = arrow_cast('Andrew', 'Utf8View'),
+ arrow_cast('Andrew', 'Utf8View') = column1_utf8view,
+ column1_utf8view <> arrow_cast('Andrew', 'Utf8View'),
+ arrow_cast('Andrew', 'Utf8View') <> column1_utf8view
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+########
+## StringView to String
+########
+
+# test StringViewArray with Utf8 columns
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_utf8view = column2_utf8,
+ column2_utf8 = column1_utf8view,
+ column1_utf8view <> column2_utf8,
+ column2_utf8 <> column1_utf8view
+from test;
+----
+Andrew X false false true true
+Xiangpeng Xiangpeng true true false false
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# test StringViewArray with LargeUtf8 columns
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_utf8view = column2_large_utf8,
+ column2_large_utf8 = column1_utf8view,
+ column1_utf8view <> column2_large_utf8,
+ column2_large_utf8 <> column1_utf8view
+from test;
+----
+Andrew X false false true true
+Xiangpeng Xiangpeng true true false false
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+
+# StringView column to String scalar
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_utf8view = arrow_cast('Andrew', 'Utf8'),
+ arrow_cast('Andrew', 'Utf8') = column1_utf8view,
+ column1_utf8view <> arrow_cast('Andrew', 'Utf8'),
+ arrow_cast('Andrew', 'Utf8') <> column1_utf8view
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# StringView column to LargeString scalar
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_utf8view = arrow_cast('Andrew', 'LargeUtf8'),
+ arrow_cast('Andrew', 'LargeUtf8') = column1_utf8view,
+ column1_utf8view <> arrow_cast('Andrew', 'LargeUtf8'),
+ arrow_cast('Andrew', 'LargeUtf8') <> column1_utf8view
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# String column to StringView scalar
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_utf8 = arrow_cast('Andrew', 'Utf8View'),
+ arrow_cast('Andrew', 'Utf8View') = column1_utf8,
+ column1_utf8 <> arrow_cast('Andrew', 'Utf8View'),
+ arrow_cast('Andrew', 'Utf8View') <> column1_utf8
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# LargeString column to StringView scalar
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_large_utf8 = arrow_cast('Andrew', 'Utf8View'),
+ arrow_cast('Andrew', 'Utf8View') = column1_large_utf8,
+ column1_large_utf8 <> arrow_cast('Andrew', 'Utf8View'),
+ arrow_cast('Andrew', 'Utf8View') <> column1_large_utf8
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+########
+## StringView to Dictionary
+########
+
+# test StringViewArray with Dictionary columns
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_utf8view = column2_dict,
+ column2_dict = column1_utf8view,
+ column1_utf8view <> column2_dict,
+ column2_dict <> column1_utf8view
+from test;
+----
+Andrew X false false true true
+Xiangpeng Xiangpeng true true false false
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# StringView column to Dict scalar
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_utf8view = arrow_cast('Andrew', 'Dictionary(Int32,
Utf8)'),
+ arrow_cast('Andrew', 'Dictionary(Int32, Utf8)') = column1_utf8view,
+ column1_utf8view <> arrow_cast('Andrew', 'Dictionary(Int32,
Utf8)'),
+ arrow_cast('Andrew', 'Dictionary(Int32, Utf8)') <> column1_utf8view
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# Dict column to StringView scalar
+query TTBBBB
+select
+ column1_utf8, column2_utf8,
+ column1_dict = arrow_cast('Andrew', 'Utf8View'),
+ arrow_cast('Andrew', 'Utf8View') = column1_dict,
+ column1_dict <> arrow_cast('Andrew', 'Utf8View'),
+ arrow_cast('Andrew', 'Utf8View') <> column1_dict
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+
+########
+## Coercion Rules
+########
+
+
+statement ok
+set datafusion.explain.logical_plan_only = true;
+
+
+# Filter should have a StringView literal and no column cast
+query TT
+explain SELECT column1_utf8 from test where column1_utf8view = 'Andrew';
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = Utf8View("Andrew")
+03)----TableScan: test projection=[column1_utf8, column1_utf8view]
+
+# reverse order should be the same
+query TT
+explain SELECT column1_utf8 from test where 'Andrew' = column1_utf8view;
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = Utf8View("Andrew")
+03)----TableScan: test projection=[column1_utf8, column1_utf8view]
+
+query TT
+explain SELECT column1_utf8 from test where column1_utf8 =
arrow_cast('Andrew', 'Utf8View');
+----
+logical_plan
+01)Filter: test.column1_utf8 = Utf8("Andrew")
+02)--TableScan: test projection=[column1_utf8]
+
+query TT
+explain SELECT column1_utf8 from test where arrow_cast('Andrew', 'Utf8View') =
column1_utf8;
+----
+logical_plan
+01)Filter: test.column1_utf8 = Utf8("Andrew")
+02)--TableScan: test projection=[column1_utf8]
+
+query TT
+explain SELECT column1_utf8 from test where column1_utf8view =
arrow_cast('Andrew', 'Dictionary(Int32, Utf8)');
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = Utf8View("Andrew")
+03)----TableScan: test projection=[column1_utf8, column1_utf8view]
+
+query TT
+explain SELECT column1_utf8 from test where arrow_cast('Andrew',
'Dictionary(Int32, Utf8)') = column1_utf8view;
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = Utf8View("Andrew")
+03)----TableScan: test projection=[column1_utf8, column1_utf8view]
+
+# compare string / stringview
+# Should cast string -> stringview (which is cheap), not stringview -> string
(which is not)
+query TT
+explain SELECT column1_utf8 from test where column1_utf8view = column2_utf8;
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = CAST(test.column2_utf8 AS Utf8View)
+03)----TableScan: test projection=[column1_utf8, column2_utf8,
column1_utf8view]
+
+query TT
+explain SELECT column1_utf8 from test where column2_utf8 = column1_utf8view;
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: CAST(test.column2_utf8 AS Utf8View) = test.column1_utf8view
+03)----TableScan: test projection=[column1_utf8, column2_utf8,
column1_utf8view]
+
+
+statement ok
+drop table test;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]