This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new e8ac93a0ff Add native stringview support for LTRIM & RTRIM (#11948)
e8ac93a0ff is described below
commit e8ac93a0ffd490bd549872366bf7fd1b77ffd7f8
Author: kf zheng <[email protected]>
AuthorDate: Wed Aug 14 02:27:03 2024 +0800
Add native stringview support for LTRIM & RTRIM (#11948)
* add stringview option for ltrim
* add stringview option for rtrim
* add some tests to ensure no casts for ltrim & rtrim when using stringview
* fix typo and remove useless comments
* add tests covering ltrim and rtrim functioning
---
datafusion/functions/src/string/btrim.rs | 3 +-
datafusion/functions/src/string/ltrim.rs | 20 +++-
datafusion/functions/src/string/rtrim.rs | 20 +++-
datafusion/sqllogictest/test_files/string_view.slt | 128 +++++++++++++++------
4 files changed, 126 insertions(+), 45 deletions(-)
diff --git a/datafusion/functions/src/string/btrim.rs
b/datafusion/functions/src/string/btrim.rs
index 86470dd7a6..371a11c82c 100644
--- a/datafusion/functions/src/string/btrim.rs
+++ b/datafusion/functions/src/string/btrim.rs
@@ -57,7 +57,6 @@ impl BTrimFunc {
// For example, given input `(Utf8View, Utf8)`, it first
tries coercing to `(Utf8View, Utf8View)`.
// If that fails, it proceeds to `(Utf8, Utf8)`.
Exact(vec![Utf8View, Utf8View]),
- // Exact(vec![Utf8, Utf8View]),
Exact(vec![Utf8, Utf8]),
Exact(vec![Utf8View]),
Exact(vec![Utf8]),
@@ -98,7 +97,7 @@ impl ScalarUDFImpl for BTrimFunc {
)(args),
other => exec_err!(
"Unsupported data type {other:?} for function btrim,\
- expected for Utf8, LargeUtf8 or Utf8View."
+ expected Utf8, LargeUtf8 or Utf8View."
),
}
}
diff --git a/datafusion/functions/src/string/ltrim.rs
b/datafusion/functions/src/string/ltrim.rs
index 6a9fafdd92..b7b27afcee 100644
--- a/datafusion/functions/src/string/ltrim.rs
+++ b/datafusion/functions/src/string/ltrim.rs
@@ -32,7 +32,8 @@ use crate::utils::{make_scalar_function, utf8_to_str_type};
/// Returns the longest string with leading characters removed. If the
characters are not specified, whitespace is removed.
/// ltrim('zzzytest', 'xyz') = 'test'
fn ltrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
- general_trim::<T>(args, TrimType::Left, false)
+ let use_string_view = args[0].data_type() == &DataType::Utf8View;
+ general_trim::<T>(args, TrimType::Left, use_string_view)
}
#[derive(Debug)]
@@ -51,7 +52,15 @@ impl LtrimFunc {
use DataType::*;
Self {
signature: Signature::one_of(
- vec![Exact(vec![Utf8]), Exact(vec![Utf8, Utf8])],
+ vec![
+ // Planner attempts coercion to the target type starting
with the most preferred candidate.
+ // For example, given input `(Utf8View, Utf8)`, it first
tries coercing to `(Utf8View, Utf8View)`.
+ // If that fails, it proceeds to `(Utf8, Utf8)`.
+ Exact(vec![Utf8View, Utf8View]),
+ Exact(vec![Utf8, Utf8]),
+ Exact(vec![Utf8View]),
+ Exact(vec![Utf8]),
+ ],
Volatility::Immutable,
),
}
@@ -77,7 +86,7 @@ impl ScalarUDFImpl for LtrimFunc {
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
match args[0].data_type() {
- DataType::Utf8 => make_scalar_function(
+ DataType::Utf8 | DataType::Utf8View => make_scalar_function(
ltrim::<i32>,
vec![Hint::Pad, Hint::AcceptsSingular],
)(args),
@@ -85,7 +94,10 @@ impl ScalarUDFImpl for LtrimFunc {
ltrim::<i64>,
vec![Hint::Pad, Hint::AcceptsSingular],
)(args),
- other => exec_err!("Unsupported data type {other:?} for function
ltrim"),
+ other => exec_err!(
+ "Unsupported data type {other:?} for function ltrim,\
+ expected Utf8, LargeUtf8 or Utf8View."
+ ),
}
}
}
diff --git a/datafusion/functions/src/string/rtrim.rs
b/datafusion/functions/src/string/rtrim.rs
index 50b626e3df..ec53f3ed74 100644
--- a/datafusion/functions/src/string/rtrim.rs
+++ b/datafusion/functions/src/string/rtrim.rs
@@ -32,7 +32,8 @@ use crate::utils::{make_scalar_function, utf8_to_str_type};
/// Returns the longest string with trailing characters removed. If the
characters are not specified, whitespace is removed.
/// rtrim('testxxzx', 'xyz') = 'test'
fn rtrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
- general_trim::<T>(args, TrimType::Right, false)
+ let use_string_view = args[0].data_type() == &DataType::Utf8View;
+ general_trim::<T>(args, TrimType::Right, use_string_view)
}
#[derive(Debug)]
@@ -51,7 +52,15 @@ impl RtrimFunc {
use DataType::*;
Self {
signature: Signature::one_of(
- vec![Exact(vec![Utf8]), Exact(vec![Utf8, Utf8])],
+ vec![
+ // Planner attempts coercion to the target type starting
with the most preferred candidate.
+ // For example, given input `(Utf8View, Utf8)`, it first
tries coercing to `(Utf8View, Utf8View)`.
+ // If that fails, it proceeds to `(Utf8, Utf8)`.
+ Exact(vec![Utf8View, Utf8View]),
+ Exact(vec![Utf8, Utf8]),
+ Exact(vec![Utf8View]),
+ Exact(vec![Utf8]),
+ ],
Volatility::Immutable,
),
}
@@ -77,7 +86,7 @@ impl ScalarUDFImpl for RtrimFunc {
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
match args[0].data_type() {
- DataType::Utf8 => make_scalar_function(
+ DataType::Utf8 | DataType::Utf8View => make_scalar_function(
rtrim::<i32>,
vec![Hint::Pad, Hint::AcceptsSingular],
)(args),
@@ -85,7 +94,10 @@ impl ScalarUDFImpl for RtrimFunc {
rtrim::<i64>,
vec![Hint::Pad, Hint::AcceptsSingular],
)(args),
- other => exec_err!("Unsupported data type {other:?} for function
rtrim"),
+ other => exec_err!(
+ "Unsupported data type {other:?} for function rtrim,\
+ expected Utf8, LargeUtf8 or Utf8View."
+ ),
}
}
}
diff --git a/datafusion/sqllogictest/test_files/string_view.slt
b/datafusion/sqllogictest/test_files/string_view.slt
index 0088b035e7..2381bd122b 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -607,6 +607,99 @@ Xiangpeng Xiangpeng Xiangpeng NULL
Raphael Raphael Raphael NULL
NULL NULL NULL NULL
+## Ensure no casts for LTRIM
+# Test LTRIM with Utf8View input
+query TT
+EXPLAIN SELECT
+ LTRIM(column1_utf8view) AS l
+FROM test;
+----
+logical_plan
+01)Projection: ltrim(test.column1_utf8view) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test LTRIM with Utf8View input and Utf8View pattern
+query TT
+EXPLAIN SELECT
+ LTRIM(column1_utf8view, 'foo') AS l
+FROM test;
+----
+logical_plan
+01)Projection: ltrim(test.column1_utf8view, Utf8View("foo")) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test LTRIM with Utf8View bytes longer than 12
+query TT
+EXPLAIN SELECT
+ LTRIM(column1_utf8view, 'this is longer than 12') AS l
+FROM test;
+----
+logical_plan
+01)Projection: ltrim(test.column1_utf8view, Utf8View("this is longer than
12")) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test LTRIM outputs
+query TTTTT
+SELECT
+ LTRIM(column1_utf8view, 'foo') AS l1,
+ LTRIM(column1_utf8view, column2_utf8view) AS l2,
+ LTRIM(column1_utf8view) AS l3,
+ LTRIM(column1_utf8view, NULL) AS l4,
+ LTRIM(column1_utf8view, 'Xiang') AS l5
+FROM test;
+----
+Andrew Andrew Andrew NULL Andrew
+Xiangpeng (empty) Xiangpeng NULL peng
+Raphael aphael Raphael NULL Raphael
+NULL NULL NULL NULL NULL
+
+## ensure no casts for RTRIM
+# Test RTRIM with Utf8View input
+query TT
+EXPLAIN SELECT
+ RTRIM(column1_utf8view) AS l
+FROM test;
+----
+logical_plan
+01)Projection: rtrim(test.column1_utf8view) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test RTRIM with Utf8View input and Utf8View pattern
+query TT
+EXPLAIN SELECT
+ RTRIM(column1_utf8view, 'foo') AS l
+FROM test;
+----
+logical_plan
+01)Projection: rtrim(test.column1_utf8view, Utf8View("foo")) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test RTRIM with Utf8View bytes longer than 12
+query TT
+EXPLAIN SELECT
+ RTRIM(column1_utf8view, 'this is longer than 12') AS l
+FROM test;
+----
+logical_plan
+01)Projection: rtrim(test.column1_utf8view, Utf8View("this is longer than
12")) AS l
+02)--TableScan: test projection=[column1_utf8view]
+
+# Test RTRIM outputs
+query TTTTT
+SELECT
+ RTRIM(column1_utf8view, 'foo') AS l1,
+ RTRIM(column1_utf8view, column2_utf8view) AS l2,
+ RTRIM(column1_utf8view) AS l3,
+ RTRIM(column1_utf8view, NULL) AS l4,
+ RTRIM(column1_utf8view, 'peng') As l5
+FROM test;
+----
+Andrew Andrew Andrew NULL Andrew
+Xiangpeng (empty) Xiangpeng NULL Xia
+Raphael Raphael Raphael NULL Raphael
+NULL NULL NULL NULL NULL
+
+
## Ensure no casts for CHARACTER_LENGTH
query TT
EXPLAIN SELECT
@@ -685,16 +778,6 @@ logical_plan
01)Projection: lower(CAST(test.column1_utf8view AS Utf8)) AS c1
02)--TableScan: test projection=[column1_utf8view]
-## Ensure no casts for LTRIM
-## TODO https://github.com/apache/datafusion/issues/11856
-query TT
-EXPLAIN SELECT
- LTRIM(column1_utf8view) as c1
-FROM test;
-----
-logical_plan
-01)Projection: ltrim(CAST(test.column1_utf8view AS Utf8)) AS c1
-02)--TableScan: test projection=[column1_utf8view]
## Ensure no casts for LPAD
query TT
@@ -811,18 +894,6 @@ logical_plan
01)Projection: reverse(CAST(test.column1_utf8view AS Utf8)) AS c1
02)--TableScan: test projection=[column1_utf8view]
-## Ensure no casts for RTRIM
-## TODO file ticket
-query TT
-EXPLAIN SELECT
- RTRIM(column1_utf8view) as c1,
- RTRIM(column1_utf8view, 'foo') as c2
-FROM test;
-----
-logical_plan
-01)Projection: rtrim(__common_expr_1) AS c1, rtrim(__common_expr_1,
Utf8("foo")) AS c2
-02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1
-03)----TableScan: test projection=[column1_utf8view]
## Ensure no casts for RIGHT
## TODO file ticket
@@ -849,19 +920,6 @@ logical_plan
03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
-## Ensure no casts for RTRIM
-## TODO file ticket
-query TT
-EXPLAIN SELECT
- RTRIM(column1_utf8view) as c,
- RTRIM(column1_utf8view, column2_utf8view) as c1
-FROM test;
-----
-logical_plan
-01)Projection: rtrim(__common_expr_1) AS c, rtrim(__common_expr_1,
CAST(test.column2_utf8view AS Utf8)) AS c1
-02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1,
test.column2_utf8view
-03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
-
## Ensure no casts for SPLIT_PART
## TODO file ticket
query TT
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]