This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new fce331a4f7 Migrate Regex Functions from static docs (#12886)
fce331a4f7 is described below

commit fce331a4f7ca908ee7b4500d104fae9ec657f471
Author: Jonathan Chen <[email protected]>
AuthorDate: Mon Oct 14 13:20:21 2024 -0400

    Migrate Regex Functions from static docs (#12886)
    
    * regex migrate
    
    * small fixes
    
    * update docs
    
    ---------
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 datafusion/functions/src/regex/regexpmatch.rs      | 49 ++++++++++-
 datafusion/functions/src/regex/regexpreplace.rs    | 52 +++++++++++-
 docs/source/user-guide/sql/scalar_functions.md     | 97 ----------------------
 docs/source/user-guide/sql/scalar_functions_new.md | 82 ++++++++++++++++++
 4 files changed, 178 insertions(+), 102 deletions(-)

diff --git a/datafusion/functions/src/regex/regexpmatch.rs 
b/datafusion/functions/src/regex/regexpmatch.rs
index bfec97f92c..443e505332 100644
--- a/datafusion/functions/src/regex/regexpmatch.rs
+++ b/datafusion/functions/src/regex/regexpmatch.rs
@@ -26,10 +26,11 @@ use datafusion_common::{arrow_datafusion_err, plan_err};
 use datafusion_common::{
     cast::as_generic_string_array, internal_err, DataFusionError, Result,
 };
-use datafusion_expr::{ColumnarValue, TypeSignature};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_REGEX;
+use datafusion_expr::{ColumnarValue, Documentation, TypeSignature};
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 #[derive(Debug)]
 pub struct RegexpMatchFunc {
@@ -106,7 +107,51 @@ impl ScalarUDFImpl for RegexpMatchFunc {
             result.map(ColumnarValue::Array)
         }
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_regexp_match_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_regexp_match_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_REGEX)
+            .with_description("Returns a list of [regular 
expression](https://docs.rs/regex/latest/regex/#syntax) matches in a string.")
+            .with_syntax_example("regexp_match(str, regexp[, flags])")
+            .with_sql_example(r#"```sql
+            > select regexp_match('Köln', '[a-zA-Z]ö[a-zA-Z]{2}');
+            +---------------------------------------------------------+
+            | regexp_match(Utf8("Köln"),Utf8("[a-zA-Z]ö[a-zA-Z]{2}")) |
+            +---------------------------------------------------------+
+            | [Köln]                                                  |
+            +---------------------------------------------------------+
+            SELECT regexp_match('aBc', '(b|d)', 'i');
+            +---------------------------------------------------+
+            | regexp_match(Utf8("aBc"),Utf8("(b|d)"),Utf8("i")) |
+            +---------------------------------------------------+
+            | [B]                                               |
+            +---------------------------------------------------+
+```
+Additional examples can be found 
[here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs)
+"#)
+            .with_standard_argument("str", "String")
+            .with_argument("regexp","Regular expression to match against.
+            Can be a constant, column, or function.")
+            .with_argument("flags",
+                           r#"Optional regular expression flags that control 
the behavior of the regular expression. The following flags are supported:
+  - **i**: case-insensitive: letters match both upper and lower case
+  - **m**: multi-line mode: ^ and $ match begin/end of line
+  - **s**: allow . to match \n
+  - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used
+  - **U**: swap the meaning of x* and x*?"#)
+            .build()
+            .unwrap()
+    })
 }
+
 fn regexp_match_func(args: &[ArrayRef]) -> Result<ArrayRef> {
     match args[0].data_type() {
         DataType::Utf8 => regexp_match::<i32>(args),
diff --git a/datafusion/functions/src/regex/regexpreplace.rs 
b/datafusion/functions/src/regex/regexpreplace.rs
index bce8752af2..279e5c6ba9 100644
--- a/datafusion/functions/src/regex/regexpreplace.rs
+++ b/datafusion/functions/src/regex/regexpreplace.rs
@@ -32,14 +32,15 @@ use datafusion_common::{
     cast::as_generic_string_array, internal_err, DataFusionError, Result,
 };
 use datafusion_expr::function::Hint;
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_REGEX;
 use datafusion_expr::ColumnarValue;
 use datafusion_expr::TypeSignature;
-use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use datafusion_expr::{Documentation, ScalarUDFImpl, Signature, Volatility};
 use regex::Regex;
 use std::any::Any;
 use std::collections::HashMap;
-use std::sync::Arc;
-use std::sync::OnceLock;
+use std::sync::{Arc, OnceLock};
+
 #[derive(Debug)]
 pub struct RegexpReplaceFunc {
     signature: Signature,
@@ -123,6 +124,51 @@ impl ScalarUDFImpl for RegexpReplaceFunc {
             result.map(ColumnarValue::Array)
         }
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_regexp_replace_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_regexp_replace_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+    Documentation::builder()
+        .with_doc_section(DOC_SECTION_REGEX)
+        .with_description("Replaces substrings in a string that match a 
[regular expression](https://docs.rs/regex/latest/regex/#syntax).")
+        .with_syntax_example("regexp_replace(str, regexp, replacement[, 
flags])")
+        .with_sql_example(r#"```sql
+> select regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', 'g');
++------------------------------------------------------------------------+
+| regexp_replace(Utf8("foobarbaz"),Utf8("b(..)"),Utf8("X\1Y"),Utf8("g")) |
++------------------------------------------------------------------------+
+| fooXarYXazY                                                            |
++------------------------------------------------------------------------+
+SELECT regexp_replace('aBc', '(b|d)', 'Ab\\1a', 'i');
++-------------------------------------------------------------------+
+| regexp_replace(Utf8("aBc"),Utf8("(b|d)"),Utf8("Ab\1a"),Utf8("i")) |
++-------------------------------------------------------------------+
+| aAbBac                                                            |
++-------------------------------------------------------------------+
+```
+Additional examples can be found 
[here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs)
+"#)
+        .with_standard_argument("str", "String")
+        .with_argument("regexp","Regular expression to match against.
+        Can be a constant, column, or function.")
+        .with_standard_argument("replacement", "Replacement string")
+        .with_argument("flags",
+                       r#"Optional regular expression flags that control the 
behavior of the regular expression. The following flags are supported:
+- **g**: (global) Search globally and don't return after the first match
+- **i**: case-insensitive: letters match both upper and lower case
+- **m**: multi-line mode: ^ and $ match begin/end of line
+- **s**: allow . to match \n
+- **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used
+- **U**: swap the meaning of x* and x*?"#)
+        .build()
+        .unwrap()
+})
 }
 
 fn regexp_replace_func(args: &[ColumnarValue]) -> Result<ArrayRef> {
diff --git a/docs/source/user-guide/sql/scalar_functions.md 
b/docs/source/user-guide/sql/scalar_functions.md
index 95762d9585..56145ec803 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -563,103 +563,6 @@ See the new documentation 
[`here`](https://datafusion.apache.org/user-guide/sql/
 
 See the new documentation 
[`here`](https://datafusion.apache.org/user-guide/sql/scalar_functions_new.html)
 
-## Regular Expression Functions
-
-Apache DataFusion uses a [PCRE-like] regular expression [syntax]
-(minus support for several features including look-around and backreferences).
-The following regular expression functions are supported:
-
-- [regexp_match](#regexp_match)
-- [regexp_replace](#regexp_replace)
-
-[pcre-like]: 
https://en.wikibooks.org/wiki/Regular_Expressions/Perl-Compatible_Regular_Expressions
-[syntax]: https://docs.rs/regex/latest/regex/#syntax
-
-### `regexp_match`
-
-Returns a list of [regular 
expression](https://docs.rs/regex/latest/regex/#syntax) matches in a string.
-
-```
-regexp_match(str, regexp[, flags])
-```
-
-#### Arguments
-
-- **str**: String expression to operate on.
-  Can be a constant, column, or function, and any combination of string 
operators.
-- **regexp**: Regular expression to match against.
-  Can be a constant, column, or function.
-- **flags**: Optional regular expression flags that control the behavior of the
-  regular expression. The following flags are supported:
-  - **i**: case-insensitive: letters match both upper and lower case
-  - **m**: multi-line mode: ^ and $ match begin/end of line
-  - **s**: allow . to match \n
-  - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used
-  - **U**: swap the meaning of x* and x*?
-
-#### Example
-
-```sql
-select regexp_match('Köln', '[a-zA-Z]ö[a-zA-Z]{2}');
-+---------------------------------------------------------+
-| regexp_match(Utf8("Köln"),Utf8("[a-zA-Z]ö[a-zA-Z]{2}")) |
-+---------------------------------------------------------+
-| [Köln]                                                  |
-+---------------------------------------------------------+
-SELECT regexp_match('aBc', '(b|d)', 'i');
-+---------------------------------------------------+
-| regexp_match(Utf8("aBc"),Utf8("(b|d)"),Utf8("i")) |
-+---------------------------------------------------+
-| [B]                                               |
-+---------------------------------------------------+
-```
-
-Additional examples can be found 
[here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs)
-
-### `regexp_replace`
-
-Replaces substrings in a string that match a [regular 
expression](https://docs.rs/regex/latest/regex/#syntax).
-
-```
-regexp_replace(str, regexp, replacement[, flags])
-```
-
-#### Arguments
-
-- **str**: String expression to operate on.
-  Can be a constant, column, or function, and any combination of string 
operators.
-- **regexp**: Regular expression to match against.
-  Can be a constant, column, or function.
-- **replacement**: Replacement string expression.
-  Can be a constant, column, or function, and any combination of string 
operators.
-- **flags**: Optional regular expression flags that control the behavior of the
-  regular expression. The following flags are supported:
-  - **g**: (global) Search globally and don't return after the first match
-  - **i**: case-insensitive: letters match both upper and lower case
-  - **m**: multi-line mode: ^ and $ match begin/end of line
-  - **s**: allow . to match \n
-  - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used
-  - **U**: swap the meaning of x* and x*?
-
-#### Example
-
-```sql
-SELECT regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', 'g');
-+------------------------------------------------------------------------+
-| regexp_replace(Utf8("foobarbaz"),Utf8("b(..)"),Utf8("X\1Y"),Utf8("g")) |
-+------------------------------------------------------------------------+
-| fooXarYXazY                                                            |
-+------------------------------------------------------------------------+
-SELECT regexp_replace('aBc', '(b|d)', 'Ab\\1a', 'i');
-+-------------------------------------------------------------------+
-| regexp_replace(Utf8("aBc"),Utf8("(b|d)"),Utf8("Ab\1a"),Utf8("i")) |
-+-------------------------------------------------------------------+
-| aAbBac                                                            |
-+-------------------------------------------------------------------+
-```
-
-Additional examples can be found 
[here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs)
-
 ### `position`
 
 Returns the position of `substr` in `origstr` (counting from 1). If `substr` 
does
diff --git a/docs/source/user-guide/sql/scalar_functions_new.md 
b/docs/source/user-guide/sql/scalar_functions_new.md
index 96fbcaa110..7d0261da0a 100644
--- a/docs/source/user-guide/sql/scalar_functions_new.md
+++ b/docs/source/user-guide/sql/scalar_functions_new.md
@@ -1191,6 +1191,8 @@ regular expression 
[syntax](https://docs.rs/regex/latest/regex/#syntax)
 The following regular expression functions are supported:
 
 - [regexp_like](#regexp_like)
+- [regexp_match](#regexp_match)
+- [regexp_replace](#regexp_replace)
 
 ### `regexp_like`
 
@@ -1230,6 +1232,86 @@ SELECT regexp_like('aBc', '(b|d)', 'i');
 
 Additional examples can be found 
[here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs)
 
+### `regexp_match`
+
+Returns a list of [regular 
expression](https://docs.rs/regex/latest/regex/#syntax) matches in a string.
+
+```
+regexp_match(str, regexp[, flags])
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or 
function, and any combination of operators.
+- **regexp**: Regular expression to match against.
+  Can be a constant, column, or function.
+- **flags**: Optional regular expression flags that control the behavior of 
the regular expression. The following flags are supported:
+  - **i**: case-insensitive: letters match both upper and lower case
+  - **m**: multi-line mode: ^ and $ match begin/end of line
+  - **s**: allow . to match \n
+  - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used
+  - **U**: swap the meaning of x* and x*?
+
+#### Example
+
+```sql
+            > select regexp_match('Köln', '[a-zA-Z]ö[a-zA-Z]{2}');
+            +---------------------------------------------------------+
+            | regexp_match(Utf8("Köln"),Utf8("[a-zA-Z]ö[a-zA-Z]{2}")) |
+            +---------------------------------------------------------+
+            | [Köln]                                                  |
+            +---------------------------------------------------------+
+            SELECT regexp_match('aBc', '(b|d)', 'i');
+            +---------------------------------------------------+
+            | regexp_match(Utf8("aBc"),Utf8("(b|d)"),Utf8("i")) |
+            +---------------------------------------------------+
+            | [B]                                               |
+            +---------------------------------------------------+
+```
+
+Additional examples can be found 
[here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs)
+
+### `regexp_replace`
+
+Replaces substrings in a string that match a [regular 
expression](https://docs.rs/regex/latest/regex/#syntax).
+
+```
+regexp_replace(str, regexp, replacement[, flags])
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or 
function, and any combination of operators.
+- **regexp**: Regular expression to match against.
+  Can be a constant, column, or function.
+- **replacement**: Replacement string expression to operate on. Can be a 
constant, column, or function, and any combination of operators.
+- **flags**: Optional regular expression flags that control the behavior of 
the regular expression. The following flags are supported:
+- **g**: (global) Search globally and don't return after the first match
+- **i**: case-insensitive: letters match both upper and lower case
+- **m**: multi-line mode: ^ and $ match begin/end of line
+- **s**: allow . to match \n
+- **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used
+- **U**: swap the meaning of x* and x*?
+
+#### Example
+
+```sql
+> select regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', 'g');
++------------------------------------------------------------------------+
+| regexp_replace(Utf8("foobarbaz"),Utf8("b(..)"),Utf8("X\1Y"),Utf8("g")) |
++------------------------------------------------------------------------+
+| fooXarYXazY                                                            |
++------------------------------------------------------------------------+
+SELECT regexp_replace('aBc', '(b|d)', 'Ab\\1a', 'i');
++-------------------------------------------------------------------+
+| regexp_replace(Utf8("aBc"),Utf8("(b|d)"),Utf8("Ab\1a"),Utf8("i")) |
++-------------------------------------------------------------------+
+| aAbBac                                                            |
++-------------------------------------------------------------------+
+```
+
+Additional examples can be found 
[here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs)
+
 ## Time and Date Functions
 
 - [to_date](#to_date)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to