This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 618c1e8 feat: Support Substring(str [from int] [for int]) (#1621)
618c1e8 is described below
commit 618c1e85f3d1ba1adb97ca7bd3e908efa59bbd99
Author: Dmitry Patsura <[email protected]>
AuthorDate: Mon Jan 24 23:06:38 2022 +0300
feat: Support Substring(str [from int] [for int]) (#1621)
---
datafusion/src/physical_plan/functions.rs | 2 +-
.../src/physical_plan/unicode_expressions.rs | 8 ++--
datafusion/src/sql/planner.rs | 48 ++++++++++++++++++++++
datafusion/tests/sql/expr.rs | 10 +++++
4 files changed, 64 insertions(+), 4 deletions(-)
diff --git a/datafusion/src/physical_plan/functions.rs
b/datafusion/src/physical_plan/functions.rs
index ccd355d..2c1946e 100644
--- a/datafusion/src/physical_plan/functions.rs
+++ b/datafusion/src/physical_plan/functions.rs
@@ -3714,7 +3714,7 @@ mod tests {
lit(ScalarValue::Int64(Some(-1))),
],
Err(DataFusionError::Execution(
- "negative substring length not allowed".to_string(),
+ "negative substring length not allowed: substr(<str>, 1,
-1)".to_string(),
)),
&str,
Utf8,
diff --git a/datafusion/src/physical_plan/unicode_expressions.rs
b/datafusion/src/physical_plan/unicode_expressions.rs
index 3852fd7..5a20d05 100644
--- a/datafusion/src/physical_plan/unicode_expressions.rs
+++ b/datafusion/src/physical_plan/unicode_expressions.rs
@@ -452,9 +452,11 @@ pub fn substr<T: StringOffsetSizeTrait>(args: &[ArrayRef])
-> Result<ArrayRef> {
.map(|((string, start), count)| match (string, start, count) {
(Some(string), Some(start), Some(count)) => {
if count < 0 {
- Err(DataFusionError::Execution(
- "negative substring length not
allowed".to_string(),
- ))
+ Err(DataFusionError::Execution(format!(
+ "negative substring length not allowed:
substr(<str>, {}, {})",
+ start,
+ count
+ )))
} else if start <= 0 {
Ok(Some(string.to_string()))
} else {
diff --git a/datafusion/src/sql/planner.rs b/datafusion/src/sql/planner.rs
index f060044..e951a3a 100644
--- a/datafusion/src/sql/planner.rs
+++ b/datafusion/src/sql/planner.rs
@@ -1542,6 +1542,54 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
ref right,
} => self.parse_sql_binary_op(left, op, right, schema),
+ SQLExpr::Substring {
+ expr,
+ substring_from,
+ substring_for,
+ } => {
+ #[cfg(feature = "unicode_expressions")]
+ {
+ let arg = self.sql_expr_to_logical_expr(expr, schema)?;
+ let args = match (substring_from, substring_for) {
+ (Some(from_expr), Some(for_expr)) => {
+ let from_logic =
+ self.sql_expr_to_logical_expr(from_expr,
schema)?;
+ let for_logic =
+ self.sql_expr_to_logical_expr(for_expr,
schema)?;
+ vec![arg, from_logic, for_logic]
+ }
+ (Some(from_expr), None) => {
+ let from_logic =
+ self.sql_expr_to_logical_expr(from_expr,
schema)?;
+ vec![arg, from_logic]
+ }
+ (None, Some(for_expr)) => {
+ let from_logic =
Expr::Literal(ScalarValue::Int64(Some(1)));
+ let for_logic =
+ self.sql_expr_to_logical_expr(for_expr,
schema)?;
+ vec![arg, from_logic, for_logic]
+ }
+ _ => {
+ return Err(DataFusionError::Plan(format!(
+ "Substring without for/from is not valid {:?}",
+ sql
+ )))
+ }
+ };
+ Ok(Expr::ScalarFunction {
+ fun: functions::BuiltinScalarFunction::Substr,
+ args,
+ })
+ }
+
+ #[cfg(not(feature = "unicode_expressions"))]
+ {
+ Err(DataFusionError::Internal(
+ "statement substring requires compilation with feature
flag: unicode_expressions.".to_string()
+ ))
+ }
+ }
+
SQLExpr::Trim { expr, trim_where } => {
let (fun, where_expr) = match trim_where {
Some((TrimWhereField::Leading, expr)) => {
diff --git a/datafusion/tests/sql/expr.rs b/datafusion/tests/sql/expr.rs
index be330e2..d6ab1a5 100644
--- a/datafusion/tests/sql/expr.rs
+++ b/datafusion/tests/sql/expr.rs
@@ -519,6 +519,16 @@ async fn test_interval_expressions() -> Result<()> {
Ok(())
}
+#[cfg(feature = "unicode_expressions")]
+#[tokio::test]
+async fn test_substring_expr() -> Result<()> {
+ test_expression!("substring('alphabet' from 2 for 1)", "l");
+ test_expression!("substring('alphabet' from 8)", "t");
+ test_expression!("substring('alphabet' for 1)", "a");
+
+ Ok(())
+}
+
#[tokio::test]
async fn test_string_expressions() -> Result<()> {
test_expression!("ascii('')", "0");