This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch string-view
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/string-view by this push:
     new 507d978a3b feat: Implement equality = and inequality <> support for 
StringView (#10985)
507d978a3b is described below

commit 507d978a3b2b9fe873239ae2d4640286e423086a
Author: Alex Huang <[email protected]>
AuthorDate: Wed Jun 19 19:38:03 2024 +0800

    feat: Implement equality = and inequality <> support for StringView (#10985)
    
    * feat: Implement equality = and inequality <> support for StringView
    
    * chore: Add tests for the StringView
    
    * chore
    
    * chore: Update tests for NULL
    
    * fix: Used build_array_string!
    
    * chore: Update string_coercion function to handle Utf8View type in 
binary.rs
    
    * chore: add tests
    
    * chore: ci
---
 Cargo.toml                                         |  24 ++---
 datafusion-cli/Cargo.lock                          |  30 +++---
 datafusion-cli/Cargo.toml                          |  22 ++--
 datafusion/common/src/scalar/mod.rs                |   2 +-
 datafusion/expr/src/type_coercion/binary.rs        |   1 +
 datafusion/sqllogictest/test_files/string_view.slt | 113 +++++++++++++++++++++
 6 files changed, 153 insertions(+), 39 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 290dd64021..be6e0c672f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -157,15 +157,15 @@ unused_imports = "deny"
 ## Temporary arrow-rs patch until 52.1.0 is released
 
 [patch.crates-io]
-arrow = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-array = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-buffer = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-cast = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-data = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-ipc = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-schema = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-select = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-string = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-ord = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-flight = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"72467c670f8c38130e4743347407f1a542e59e0c" }
-parquet = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"72467c670f8c38130e4743347407f1a542e59e0c" }
+arrow = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-array = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-buffer = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-cast = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-data = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-ipc = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-schema = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-select = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-string = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-ord = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-flight = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+parquet = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index b0b41a1232..15f7809ee5 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -131,7 +131,7 @@ checksum = 
"96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
 [[package]]
 name = "arrow"
 version = "52.0.0"
-source = 
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c";
+source = 
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d";
 dependencies = [
  "arrow-arith",
  "arrow-array",
@@ -151,7 +151,7 @@ dependencies = [
 [[package]]
 name = "arrow-arith"
 version = "52.0.0"
-source = 
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c";
+source = 
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d";
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -165,7 +165,7 @@ dependencies = [
 [[package]]
 name = "arrow-array"
 version = "52.0.0"
-source = 
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c";
+source = 
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d";
 dependencies = [
  "ahash",
  "arrow-buffer",
@@ -181,7 +181,7 @@ dependencies = [
 [[package]]
 name = "arrow-buffer"
 version = "52.0.0"
-source = 
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c";
+source = 
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d";
 dependencies = [
  "bytes",
  "half",
@@ -191,7 +191,7 @@ dependencies = [
 [[package]]
 name = "arrow-cast"
 version = "52.0.0"
-source = 
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c";
+source = 
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d";
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -211,7 +211,7 @@ dependencies = [
 [[package]]
 name = "arrow-csv"
 version = "52.0.0"
-source = 
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c";
+source = 
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d";
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -229,7 +229,7 @@ dependencies = [
 [[package]]
 name = "arrow-data"
 version = "52.0.0"
-source = 
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c";
+source = 
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d";
 dependencies = [
  "arrow-buffer",
  "arrow-schema",
@@ -240,7 +240,7 @@ dependencies = [
 [[package]]
 name = "arrow-ipc"
 version = "52.0.0"
-source = 
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c";
+source = 
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d";
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -254,7 +254,7 @@ dependencies = [
 [[package]]
 name = "arrow-json"
 version = "52.0.0"
-source = 
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c";
+source = 
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d";
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -273,7 +273,7 @@ dependencies = [
 [[package]]
 name = "arrow-ord"
 version = "52.0.0"
-source = 
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c";
+source = 
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d";
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -287,7 +287,7 @@ dependencies = [
 [[package]]
 name = "arrow-row"
 version = "52.0.0"
-source = 
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c";
+source = 
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d";
 dependencies = [
  "ahash",
  "arrow-array",
@@ -301,12 +301,12 @@ dependencies = [
 [[package]]
 name = "arrow-schema"
 version = "52.0.0"
-source = 
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c";
+source = 
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d";
 
 [[package]]
 name = "arrow-select"
 version = "52.0.0"
-source = 
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c";
+source = 
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d";
 dependencies = [
  "ahash",
  "arrow-array",
@@ -319,7 +319,7 @@ dependencies = [
 [[package]]
 name = "arrow-string"
 version = "52.0.0"
-source = 
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c";
+source = 
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d";
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -2704,7 +2704,7 @@ dependencies = [
 [[package]]
 name = "parquet"
 version = "52.0.0"
-source = 
"git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c";
+source = 
"git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d";
 dependencies = [
  "ahash",
  "arrow-array",
diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index b488326473..0e7b712d8b 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -66,14 +66,14 @@ rstest = "0.17"
 ## Temporary arrow-rs patch until 52.1.0 is released
 
 [patch.crates-io]
-arrow = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-array = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-buffer = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-cast = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-data = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-ipc = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-schema = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-select = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-string = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"72467c670f8c38130e4743347407f1a542e59e0c" }
-arrow-ord = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"72467c670f8c38130e4743347407f1a542e59e0c" }
-parquet = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"72467c670f8c38130e4743347407f1a542e59e0c" }
+arrow = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-array = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-buffer = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-cast = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-data = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-ipc = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-schema = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-select = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-string = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+arrow-ord = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
+parquet = { git = "https://github.com/apache/arrow-rs.git";, rev = 
"d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
diff --git a/datafusion/common/src/scalar/mod.rs 
b/datafusion/common/src/scalar/mod.rs
index 96bf4216d9..86ac115cca 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -1570,6 +1570,7 @@ impl ScalarValue {
             DataType::UInt16 => build_array_primitive!(UInt16Array, UInt16),
             DataType::UInt32 => build_array_primitive!(UInt32Array, UInt32),
             DataType::UInt64 => build_array_primitive!(UInt64Array, UInt64),
+            DataType::Utf8View => build_array_string!(StringViewArray, 
Utf8View),
             DataType::Utf8 => build_array_string!(StringArray, Utf8),
             DataType::LargeUtf8 => build_array_string!(LargeStringArray, 
LargeUtf8),
             DataType::Binary => build_array_string!(BinaryArray, Binary),
@@ -1726,7 +1727,6 @@ impl ScalarValue {
             | DataType::Time64(TimeUnit::Millisecond)
             | DataType::Map(_, _)
             | DataType::RunEndEncoded(_, _)
-            | DataType::Utf8View
             | DataType::BinaryView
             | DataType::ListView(_)
             | DataType::LargeListView(_) => {
diff --git a/datafusion/expr/src/type_coercion/binary.rs 
b/datafusion/expr/src/type_coercion/binary.rs
index d7cb4b1a3e..d57b5228cb 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -932,6 +932,7 @@ fn string_coercion(lhs_type: &DataType, rhs_type: 
&DataType) -> Option<DataType>
         (LargeUtf8, Utf8) => Some(LargeUtf8),
         (Utf8, LargeUtf8) => Some(LargeUtf8),
         (LargeUtf8, LargeUtf8) => Some(LargeUtf8),
+        (Utf8View, Utf8View) | (Utf8View, Utf8) | (Utf8, Utf8View) => 
Some(Utf8View),
         _ => None,
     }
 }
diff --git a/datafusion/sqllogictest/test_files/string_view.slt 
b/datafusion/sqllogictest/test_files/string_view.slt
new file mode 100644
index 0000000000..3be3c94770
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -0,0 +1,113 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# test StringViewArray with Utf8View columns
+statement ok
+create table test as values (arrow_cast('Andrew', 'Utf8View'), arrow_cast('X', 
'Utf8View')),
+                           (arrow_cast('Xiangpeng', 'Utf8View'), 
arrow_cast('Xiangpeng', 'Utf8View')),
+                           (arrow_cast('Raphael', 'Utf8View'), arrow_cast('R', 
'Utf8View')),
+                           (arrow_cast(NULL, 'Utf8View'), arrow_cast('R', 
'Utf8View'));
+
+query B
+select arrow_cast('NULL', 'Utf8View') = arrow_cast('Andrew', 'Utf8View');
+----
+false
+
+query B
+select arrow_cast('NULL', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View');
+----
+true
+
+query B
+select arrow_cast('Andrew', 'Utf8View') = arrow_cast('Andrew', 'Utf8View');
+----
+true
+
+query B
+select arrow_cast('Xiangpeng', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View');
+----
+true
+
+query ??
+select * from test where column1 = column2;
+----
+Xiangpeng Xiangpeng
+
+query ??
+select * from test where column1 <> column2;
+----
+Andrew X
+Raphael R
+
+query ??
+select * from test where column1 = arrow_cast('Andrew', 'Utf8View');
+----
+Andrew X
+
+query ??
+select * from test where column1 = 'Andrew';
+----
+Andrew X
+
+query ??
+select * from test where column1 <> arrow_cast('Andrew', 'Utf8View');
+----
+Xiangpeng Xiangpeng
+Raphael R
+
+query ??
+select * from test where column1 <> 'Andrew';
+----
+Xiangpeng Xiangpeng
+Raphael R
+
+statement ok
+drop table test;
+
+
+# test StringViewArray with Utf8 and Utf8View columns
+statement ok
+create table test as values ('Andrew', arrow_cast('X', 'Utf8View')),
+                            ('Xiangpeng', arrow_cast('Xiangpeng', 'Utf8View')),
+                            ('Raphael', arrow_cast('R', 'Utf8View')),
+                            (NULL, arrow_cast('R', 'Utf8View'));
+
+query T?
+select * from test where column1 = column2;
+----
+Xiangpeng Xiangpeng
+
+query T?
+select * from test where column1 <> column2;
+----
+Andrew X
+Raphael R
+
+query T?
+select * from test where column1 = arrow_cast('Andrew', 'Utf8View');
+----
+Andrew X
+
+query T?
+select * from test where column1 <> arrow_cast('Andrew', 'Utf8View');
+----
+Xiangpeng Xiangpeng
+Raphael R
+
+statement ok
+drop table test;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to