This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new e8ac93a0ff Add native stringview support for LTRIM & RTRIM (#11948)
e8ac93a0ff is described below

commit e8ac93a0ffd490bd549872366bf7fd1b77ffd7f8
Author: kf zheng <[email protected]>
AuthorDate: Wed Aug 14 02:27:03 2024 +0800

    Add native stringview support for LTRIM & RTRIM (#11948)
    
    * add stringview option for ltrim
    
    * add stringview option for rtrim
    
    * add some tests to ensure no casts for ltrim & rtrim when using stringview
    
    * fix typo and remove useless comments
    
    * add tests covering ltrim and rtrim functioning
---
 datafusion/functions/src/string/btrim.rs           |   3 +-
 datafusion/functions/src/string/ltrim.rs           |  20 +++-
 datafusion/functions/src/string/rtrim.rs           |  20 +++-
 datafusion/sqllogictest/test_files/string_view.slt | 128 +++++++++++++++------
 4 files changed, 126 insertions(+), 45 deletions(-)

diff --git a/datafusion/functions/src/string/btrim.rs 
b/datafusion/functions/src/string/btrim.rs
index 86470dd7a6..371a11c82c 100644
--- a/datafusion/functions/src/string/btrim.rs
+++ b/datafusion/functions/src/string/btrim.rs
@@ -57,7 +57,6 @@ impl BTrimFunc {
                     // For example, given input `(Utf8View, Utf8)`, it first 
tries coercing to `(Utf8View, Utf8View)`.
                     // If that fails, it proceeds to `(Utf8, Utf8)`.
                     Exact(vec![Utf8View, Utf8View]),
-                    // Exact(vec![Utf8, Utf8View]),
                     Exact(vec![Utf8, Utf8]),
                     Exact(vec![Utf8View]),
                     Exact(vec![Utf8]),
@@ -98,7 +97,7 @@ impl ScalarUDFImpl for BTrimFunc {
             )(args),
             other => exec_err!(
                 "Unsupported data type {other:?} for function btrim,\
-                expected for Utf8, LargeUtf8 or Utf8View."
+                expected Utf8, LargeUtf8 or Utf8View."
             ),
         }
     }
diff --git a/datafusion/functions/src/string/ltrim.rs 
b/datafusion/functions/src/string/ltrim.rs
index 6a9fafdd92..b7b27afcee 100644
--- a/datafusion/functions/src/string/ltrim.rs
+++ b/datafusion/functions/src/string/ltrim.rs
@@ -32,7 +32,8 @@ use crate::utils::{make_scalar_function, utf8_to_str_type};
 /// Returns the longest string  with leading characters removed. If the 
characters are not specified, whitespace is removed.
 /// ltrim('zzzytest', 'xyz') = 'test'
 fn ltrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    general_trim::<T>(args, TrimType::Left, false)
+    let use_string_view = args[0].data_type() == &DataType::Utf8View;
+    general_trim::<T>(args, TrimType::Left, use_string_view)
 }
 
 #[derive(Debug)]
@@ -51,7 +52,15 @@ impl LtrimFunc {
         use DataType::*;
         Self {
             signature: Signature::one_of(
-                vec![Exact(vec![Utf8]), Exact(vec![Utf8, Utf8])],
+                vec![
+                    // Planner attempts coercion to the target type starting 
with the most preferred candidate.
+                    // For example, given input `(Utf8View, Utf8)`, it first 
tries coercing to `(Utf8View, Utf8View)`.
+                    // If that fails, it proceeds to `(Utf8, Utf8)`.
+                    Exact(vec![Utf8View, Utf8View]),
+                    Exact(vec![Utf8, Utf8]),
+                    Exact(vec![Utf8View]),
+                    Exact(vec![Utf8]),
+                ],
                 Volatility::Immutable,
             ),
         }
@@ -77,7 +86,7 @@ impl ScalarUDFImpl for LtrimFunc {
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(
+            DataType::Utf8 | DataType::Utf8View => make_scalar_function(
                 ltrim::<i32>,
                 vec![Hint::Pad, Hint::AcceptsSingular],
             )(args),
@@ -85,7 +94,10 @@ impl ScalarUDFImpl for LtrimFunc {
                 ltrim::<i64>,
                 vec![Hint::Pad, Hint::AcceptsSingular],
             )(args),
-            other => exec_err!("Unsupported data type {other:?} for function 
ltrim"),
+            other => exec_err!(
+                "Unsupported data type {other:?} for function ltrim,\
+                expected Utf8, LargeUtf8 or Utf8View."
+            ),
         }
     }
 }
diff --git a/datafusion/functions/src/string/rtrim.rs 
b/datafusion/functions/src/string/rtrim.rs
index 50b626e3df..ec53f3ed74 100644
--- a/datafusion/functions/src/string/rtrim.rs
+++ b/datafusion/functions/src/string/rtrim.rs
@@ -32,7 +32,8 @@ use crate::utils::{make_scalar_function, utf8_to_str_type};
 /// Returns the longest string  with trailing characters removed. If the 
characters are not specified, whitespace is removed.
 /// rtrim('testxxzx', 'xyz') = 'test'
 fn rtrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    general_trim::<T>(args, TrimType::Right, false)
+    let use_string_view = args[0].data_type() == &DataType::Utf8View;
+    general_trim::<T>(args, TrimType::Right, use_string_view)
 }
 
 #[derive(Debug)]
@@ -51,7 +52,15 @@ impl RtrimFunc {
         use DataType::*;
         Self {
             signature: Signature::one_of(
-                vec![Exact(vec![Utf8]), Exact(vec![Utf8, Utf8])],
+                vec![
+                    // Planner attempts coercion to the target type starting 
with the most preferred candidate.
+                    // For example, given input `(Utf8View, Utf8)`, it first 
tries coercing to `(Utf8View, Utf8View)`.
+                    // If that fails, it proceeds to `(Utf8, Utf8)`.
+                    Exact(vec![Utf8View, Utf8View]),
+                    Exact(vec![Utf8, Utf8]),
+                    Exact(vec![Utf8View]),
+                    Exact(vec![Utf8]),
+                ],
                 Volatility::Immutable,
             ),
         }
@@ -77,7 +86,7 @@ impl ScalarUDFImpl for RtrimFunc {
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         match args[0].data_type() {
-            DataType::Utf8 => make_scalar_function(
+            DataType::Utf8 | DataType::Utf8View => make_scalar_function(
                 rtrim::<i32>,
                 vec![Hint::Pad, Hint::AcceptsSingular],
             )(args),
@@ -85,7 +94,10 @@ impl ScalarUDFImpl for RtrimFunc {
                 rtrim::<i64>,
                 vec![Hint::Pad, Hint::AcceptsSingular],
             )(args),
-            other => exec_err!("Unsupported data type {other:?} for function 
rtrim"),
+            other => exec_err!(
+                "Unsupported data type {other:?} for function rtrim,\
+                expected Utf8, LargeUtf8 or Utf8View."
+            ),
         }
     }
 }
diff --git a/datafusion/sqllogictest/test_files/string_view.slt 
b/datafusion/sqllogictest/test_files/string_view.slt
index 0088b035e7..2381bd122b 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -607,6 +607,99 @@ Xiangpeng Xiangpeng Xiangpeng NULL
 Raphael   Raphael   Raphael   NULL
 NULL      NULL      NULL      NULL
 
+## Ensure no casts for LTRIM
+# Test LTRIM with Utf8View input
+query TT
+EXPLAIN SELECT
+  LTRIM(column1_utf8view) AS l
+FROM test;
+----
+logical_plan
+01)Projection: ltrim(test.column1_utf8view) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test LTRIM with Utf8View input and Utf8View pattern
+query TT
+EXPLAIN SELECT
+  LTRIM(column1_utf8view, 'foo') AS l
+FROM test;
+----
+logical_plan
+01)Projection: ltrim(test.column1_utf8view, Utf8View("foo")) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test LTRIM with Utf8View bytes longer than 12
+query TT
+EXPLAIN SELECT
+  LTRIM(column1_utf8view, 'this is longer than 12') AS l
+FROM test;
+----
+logical_plan
+01)Projection: ltrim(test.column1_utf8view, Utf8View("this is longer than 
12")) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test LTRIM outputs
+query TTTTT
+SELECT
+  LTRIM(column1_utf8view, 'foo') AS l1,
+  LTRIM(column1_utf8view, column2_utf8view) AS l2,
+  LTRIM(column1_utf8view) AS l3,
+  LTRIM(column1_utf8view, NULL) AS l4,
+  LTRIM(column1_utf8view, 'Xiang') AS l5
+FROM test;
+----
+Andrew    Andrew    Andrew    NULL  Andrew
+Xiangpeng (empty)   Xiangpeng NULL  peng
+Raphael   aphael    Raphael   NULL  Raphael
+NULL      NULL      NULL      NULL  NULL
+
+## ensure no casts for RTRIM
+# Test RTRIM with Utf8View input
+query TT
+EXPLAIN SELECT
+  RTRIM(column1_utf8view) AS l
+FROM test;
+----
+logical_plan
+01)Projection: rtrim(test.column1_utf8view) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test RTRIM with Utf8View input and Utf8View pattern
+query TT
+EXPLAIN SELECT
+  RTRIM(column1_utf8view, 'foo') AS l
+FROM test;
+----
+logical_plan
+01)Projection: rtrim(test.column1_utf8view, Utf8View("foo")) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test RTRIM with Utf8View bytes longer than 12
+query TT
+EXPLAIN SELECT
+  RTRIM(column1_utf8view, 'this is longer than 12') AS l
+FROM test;
+----
+logical_plan
+01)Projection: rtrim(test.column1_utf8view, Utf8View("this is longer than 
12")) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test RTRIM outputs
+query TTTTT
+SELECT
+  RTRIM(column1_utf8view, 'foo') AS l1,
+  RTRIM(column1_utf8view, column2_utf8view) AS l2,
+  RTRIM(column1_utf8view) AS l3,
+  RTRIM(column1_utf8view, NULL) AS l4,
+  RTRIM(column1_utf8view, 'peng') As l5
+FROM test;
+----
+Andrew    Andrew    Andrew    NULL  Andrew
+Xiangpeng (empty)   Xiangpeng NULL  Xia
+Raphael   Raphael   Raphael   NULL  Raphael
+NULL      NULL      NULL      NULL  NULL
+
+
 ## Ensure no casts for CHARACTER_LENGTH
 query TT
 EXPLAIN SELECT
@@ -685,16 +778,6 @@ logical_plan
 01)Projection: lower(CAST(test.column1_utf8view AS Utf8)) AS c1
 02)--TableScan: test projection=[column1_utf8view]
 
-## Ensure no casts for LTRIM
-## TODO https://github.com/apache/datafusion/issues/11856
-query TT
-EXPLAIN SELECT
-  LTRIM(column1_utf8view) as c1
-FROM test;
-----
-logical_plan
-01)Projection: ltrim(CAST(test.column1_utf8view AS Utf8)) AS c1
-02)--TableScan: test projection=[column1_utf8view]
 
 ## Ensure no casts for LPAD
 query TT
@@ -811,18 +894,6 @@ logical_plan
 01)Projection: reverse(CAST(test.column1_utf8view AS Utf8)) AS c1
 02)--TableScan: test projection=[column1_utf8view]
 
-## Ensure no casts for RTRIM
-## TODO file ticket
-query TT
-EXPLAIN SELECT
-  RTRIM(column1_utf8view) as c1,
-  RTRIM(column1_utf8view, 'foo') as c2
-FROM test;
-----
-logical_plan
-01)Projection: rtrim(__common_expr_1) AS c1, rtrim(__common_expr_1, 
Utf8("foo")) AS c2
-02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1
-03)----TableScan: test projection=[column1_utf8view]
 
 ## Ensure no casts for RIGHT
 ## TODO file ticket
@@ -849,19 +920,6 @@ logical_plan
 03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
 
 
-## Ensure no casts for RTRIM
-## TODO file ticket
-query TT
-EXPLAIN SELECT
-  RTRIM(column1_utf8view) as c,
-  RTRIM(column1_utf8view, column2_utf8view) as c1
-FROM test;
-----
-logical_plan
-01)Projection: rtrim(__common_expr_1) AS c, rtrim(__common_expr_1, 
CAST(test.column2_utf8view AS Utf8)) AS c1
-02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, 
test.column2_utf8view
-03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
-
 ## Ensure no casts for SPLIT_PART
 ## TODO file ticket
 query TT


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to