This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 618c1e8  feat: Support Substring(str [from int] [for int]) (#1621)
618c1e8 is described below

commit 618c1e85f3d1ba1adb97ca7bd3e908efa59bbd99
Author: Dmitry Patsura <[email protected]>
AuthorDate: Mon Jan 24 23:06:38 2022 +0300

    feat: Support Substring(str [from int] [for int]) (#1621)
---
 datafusion/src/physical_plan/functions.rs          |  2 +-
 .../src/physical_plan/unicode_expressions.rs       |  8 ++--
 datafusion/src/sql/planner.rs                      | 48 ++++++++++++++++++++++
 datafusion/tests/sql/expr.rs                       | 10 +++++
 4 files changed, 64 insertions(+), 4 deletions(-)

diff --git a/datafusion/src/physical_plan/functions.rs 
b/datafusion/src/physical_plan/functions.rs
index ccd355d..2c1946e 100644
--- a/datafusion/src/physical_plan/functions.rs
+++ b/datafusion/src/physical_plan/functions.rs
@@ -3714,7 +3714,7 @@ mod tests {
                 lit(ScalarValue::Int64(Some(-1))),
             ],
             Err(DataFusionError::Execution(
-                "negative substring length not allowed".to_string(),
+                "negative substring length not allowed: substr(<str>, 1, 
-1)".to_string(),
             )),
             &str,
             Utf8,
diff --git a/datafusion/src/physical_plan/unicode_expressions.rs 
b/datafusion/src/physical_plan/unicode_expressions.rs
index 3852fd7..5a20d05 100644
--- a/datafusion/src/physical_plan/unicode_expressions.rs
+++ b/datafusion/src/physical_plan/unicode_expressions.rs
@@ -452,9 +452,11 @@ pub fn substr<T: StringOffsetSizeTrait>(args: &[ArrayRef]) 
-> Result<ArrayRef> {
                 .map(|((string, start), count)| match (string, start, count) {
                     (Some(string), Some(start), Some(count)) => {
                         if count < 0 {
-                            Err(DataFusionError::Execution(
-                                "negative substring length not 
allowed".to_string(),
-                            ))
+                            Err(DataFusionError::Execution(format!(
+                                "negative substring length not allowed: 
substr(<str>, {}, {})",
+                                start,
+                                count
+                            )))
                         } else if start <= 0 {
                             Ok(Some(string.to_string()))
                         } else {
diff --git a/datafusion/src/sql/planner.rs b/datafusion/src/sql/planner.rs
index f060044..e951a3a 100644
--- a/datafusion/src/sql/planner.rs
+++ b/datafusion/src/sql/planner.rs
@@ -1542,6 +1542,54 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 ref right,
             } => self.parse_sql_binary_op(left, op, right, schema),
 
+            SQLExpr::Substring {
+                expr,
+                substring_from,
+                substring_for,
+            } => {
+                #[cfg(feature = "unicode_expressions")]
+                {
+                    let arg = self.sql_expr_to_logical_expr(expr, schema)?;
+                    let args = match (substring_from, substring_for) {
+                        (Some(from_expr), Some(for_expr)) => {
+                            let from_logic =
+                                self.sql_expr_to_logical_expr(from_expr, 
schema)?;
+                            let for_logic =
+                                self.sql_expr_to_logical_expr(for_expr, 
schema)?;
+                            vec![arg, from_logic, for_logic]
+                        }
+                        (Some(from_expr), None) => {
+                            let from_logic =
+                                self.sql_expr_to_logical_expr(from_expr, 
schema)?;
+                            vec![arg, from_logic]
+                        }
+                        (None, Some(for_expr)) => {
+                            let from_logic = 
Expr::Literal(ScalarValue::Int64(Some(1)));
+                            let for_logic =
+                                self.sql_expr_to_logical_expr(for_expr, 
schema)?;
+                            vec![arg, from_logic, for_logic]
+                        }
+                        _ => {
+                            return Err(DataFusionError::Plan(format!(
+                                "Substring without for/from is not valid {:?}",
+                                sql
+                            )))
+                        }
+                    };
+                    Ok(Expr::ScalarFunction {
+                        fun: functions::BuiltinScalarFunction::Substr,
+                        args,
+                    })
+                }
+
+                #[cfg(not(feature = "unicode_expressions"))]
+                {
+                    Err(DataFusionError::Internal(
+                        "statement substring requires compilation with feature 
flag: unicode_expressions.".to_string()
+                    ))
+                }
+            }
+
             SQLExpr::Trim { expr, trim_where } => {
                 let (fun, where_expr) = match trim_where {
                     Some((TrimWhereField::Leading, expr)) => {
diff --git a/datafusion/tests/sql/expr.rs b/datafusion/tests/sql/expr.rs
index be330e2..d6ab1a5 100644
--- a/datafusion/tests/sql/expr.rs
+++ b/datafusion/tests/sql/expr.rs
@@ -519,6 +519,16 @@ async fn test_interval_expressions() -> Result<()> {
     Ok(())
 }
 
+#[cfg(feature = "unicode_expressions")]
+#[tokio::test]
+async fn test_substring_expr() -> Result<()> {
+    test_expression!("substring('alphabet' from 2 for 1)", "l");
+    test_expression!("substring('alphabet' from 8)", "t");
+    test_expression!("substring('alphabet' for 1)", "a");
+
+    Ok(())
+}
+
 #[tokio::test]
 async fn test_string_expressions() -> Result<()> {
     test_expression!("ascii('')", "0");

Reply via email to