This is an automated email from the ASF dual-hosted git repository.

comphead pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 9b5995fa02 doc-gen: migrate scalar functions (string) documentation 
1/4 (#13924)
9b5995fa02 is described below

commit 9b5995fa024d95c19e1270447e13f3c9dd428c69
Author: Ian Lai <[email protected]>
AuthorDate: Tue Dec 31 01:40:43 2024 +0800

    doc-gen: migrate scalar functions (string) documentation 1/4 (#13924)
    
    Co-authored-by: Cheng-Yuan-Lai <a186235@g,ail.com>
---
 datafusion/functions/src/string/ascii.rs           | 58 +++++++++-----------
 datafusion/functions/src/string/bit_length.rs      | 47 +++++++---------
 datafusion/functions/src/string/chr.rs             | 46 +++++++---------
 datafusion/functions/src/string/contains.rs        | 46 +++++++---------
 datafusion/functions/src/string/octet_length.rs    | 47 +++++++---------
 datafusion/functions/src/string/rtrim.rs           | 63 ++++++++++------------
 .../functions/src/unicode/character_length.rs      | 48 +++++++----------
 7 files changed, 144 insertions(+), 211 deletions(-)

diff --git a/datafusion/functions/src/string/ascii.rs 
b/datafusion/functions/src/string/ascii.rs
index f366329b4f..858eddc7c8 100644
--- a/datafusion/functions/src/string/ascii.rs
+++ b/datafusion/functions/src/string/ascii.rs
@@ -20,12 +20,33 @@ use arrow::array::{ArrayAccessor, ArrayIter, ArrayRef, 
AsArray, Int32Array};
 use arrow::datatypes::DataType;
 use arrow::error::ArrowError;
 use datafusion_common::{internal_err, Result};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
 use datafusion_expr::{ColumnarValue, Documentation};
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use datafusion_macros::user_doc;
 use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
 
+#[user_doc(
+    doc_section(label = "String Functions"),
+    description = "Returns the Unicode character code of the first character 
in a string.",
+    syntax_example = "ascii(str)",
+    sql_example = r#"```sql
+> select ascii('abc');
++--------------------+
+| ascii(Utf8("abc")) |
++--------------------+
+| 97                 |
++--------------------+
+> select ascii('🚀');
++-------------------+
+| ascii(Utf8("🚀")) |
++-------------------+
+| 128640            |
++-------------------+
+```"#,
+    standard_argument(name = "str", prefix = "String"),
+    related_udf(name = "chr")
+)]
 #[derive(Debug)]
 pub struct AsciiFunc {
     signature: Signature,
@@ -73,41 +94,10 @@ impl ScalarUDFImpl for AsciiFunc {
     }
 
     fn documentation(&self) -> Option<&Documentation> {
-        Some(get_ascii_doc())
+        self.doc()
     }
 }
 
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_ascii_doc() -> &'static Documentation {
-    DOCUMENTATION.get_or_init(|| {
-        Documentation::builder(
-            DOC_SECTION_STRING,
-            "Returns the Unicode character code of the first character in a 
string.",
-            "ascii(str)",
-        )
-        .with_sql_example(
-            r#"```sql
-> select ascii('abc');
-+--------------------+
-| ascii(Utf8("abc")) |
-+--------------------+
-| 97                 |
-+--------------------+
-> select ascii('🚀');
-+-------------------+
-| ascii(Utf8("🚀")) |
-+-------------------+
-| 128640            |
-+-------------------+
-```"#,
-        )
-        .with_standard_argument("str", Some("String"))
-        .with_related_udf("chr")
-        .build()
-    })
-}
-
 fn calculate_ascii<'a, V>(array: V) -> Result<ArrayRef, ArrowError>
 where
     V: ArrayAccessor<Item = &'a str>,
diff --git a/datafusion/functions/src/string/bit_length.rs 
b/datafusion/functions/src/string/bit_length.rs
index 5a23692d85..623fb2ba03 100644
--- a/datafusion/functions/src/string/bit_length.rs
+++ b/datafusion/functions/src/string/bit_length.rs
@@ -18,14 +18,29 @@
 use arrow::compute::kernels::length::bit_length;
 use arrow::datatypes::DataType;
 use std::any::Any;
-use std::sync::OnceLock;
 
 use crate::utils::utf8_to_int_type;
 use datafusion_common::{exec_err, Result, ScalarValue};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
 use datafusion_expr::{ColumnarValue, Documentation, Volatility};
 use datafusion_expr::{ScalarUDFImpl, Signature};
+use datafusion_macros::user_doc;
 
+#[user_doc(
+    doc_section(label = "String Functions"),
+    description = "Returns the bit length of a string.",
+    syntax_example = "bit_length(str)",
+    sql_example = r#"```sql
+> select bit_length('datafusion');
++--------------------------------+
+| bit_length(Utf8("datafusion")) |
++--------------------------------+
+| 80                             |
++--------------------------------+
+```"#,
+    standard_argument(name = "str", prefix = "String"),
+    related_udf(name = "length"),
+    related_udf(name = "octet_length")
+)]
 #[derive(Debug)]
 pub struct BitLengthFunc {
     signature: Signature,
@@ -92,32 +107,6 @@ impl ScalarUDFImpl for BitLengthFunc {
     }
 
     fn documentation(&self) -> Option<&Documentation> {
-        Some(get_bit_length_doc())
+        self.doc()
     }
 }
-
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_bit_length_doc() -> &'static Documentation {
-    DOCUMENTATION.get_or_init(|| {
-        Documentation::builder(
-            DOC_SECTION_STRING,
-            "Returns the bit length of a string.",
-            "bit_length(str)",
-        )
-        .with_sql_example(
-            r#"```sql
-> select bit_length('datafusion');
-+--------------------------------+
-| bit_length(Utf8("datafusion")) |
-+--------------------------------+
-| 80                             |
-+--------------------------------+
-```"#,
-        )
-        .with_standard_argument("str", Some("String"))
-        .with_related_udf("length")
-        .with_related_udf("octet_length")
-        .build()
-    })
-}
diff --git a/datafusion/functions/src/string/chr.rs 
b/datafusion/functions/src/string/chr.rs
index 127b02cdf7..3530e3f22c 100644
--- a/datafusion/functions/src/string/chr.rs
+++ b/datafusion/functions/src/string/chr.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
 
 use arrow::array::ArrayRef;
 use arrow::array::StringArray;
@@ -27,9 +27,9 @@ use arrow::datatypes::DataType::Utf8;
 use crate::utils::make_scalar_function;
 use datafusion_common::cast::as_int64_array;
 use datafusion_common::{exec_err, Result};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
 use datafusion_expr::{ColumnarValue, Documentation, Volatility};
 use datafusion_expr::{ScalarUDFImpl, Signature};
+use datafusion_macros::user_doc;
 
 /// Returns the character with the given code. chr(0) is disallowed because 
text data types cannot store that character.
 /// chr(65) = 'A'
@@ -60,6 +60,21 @@ pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
     Ok(Arc::new(result) as ArrayRef)
 }
 
+#[user_doc(
+    doc_section(label = "String Functions"),
+    description = "Returns the character with the specified ASCII or Unicode 
code value.",
+    syntax_example = "chr(expression)",
+    sql_example = r#"```sql
+> select chr(128640);
++--------------------+
+| chr(Int64(128640)) |
++--------------------+
+| 🚀                 |
++--------------------+ 
+```"#,
+    standard_argument(name = "expression", prefix = "String"),
+    related_udf(name = "ascii")
+)]
 #[derive(Debug)]
 pub struct ChrFunc {
     signature: Signature,
@@ -105,31 +120,6 @@ impl ScalarUDFImpl for ChrFunc {
     }
 
     fn documentation(&self) -> Option<&Documentation> {
-        Some(get_chr_doc())
+        self.doc()
     }
 }
-
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_chr_doc() -> &'static Documentation {
-    DOCUMENTATION.get_or_init(|| {
-        Documentation::builder(
-            DOC_SECTION_STRING,
-            "Returns the character with the specified ASCII or Unicode code 
value.",
-            "chr(expression)",
-        )
-        .with_sql_example(
-            r#"```sql
-> select chr(128640);
-+--------------------+
-| chr(Int64(128640)) |
-+--------------------+
-| 🚀                 |
-+--------------------+ 
-```"#,
-        )
-        .with_standard_argument("expression", Some("String"))
-        .with_related_udf("ascii")
-        .build()
-    })
-}
diff --git a/datafusion/functions/src/string/contains.rs 
b/datafusion/functions/src/string/contains.rs
index 3e5c72ac20..36871f0c32 100644
--- a/datafusion/functions/src/string/contains.rs
+++ b/datafusion/functions/src/string/contains.rs
@@ -23,13 +23,28 @@ use arrow::datatypes::DataType::{Boolean, LargeUtf8, Utf8, 
Utf8View};
 use datafusion_common::exec_err;
 use datafusion_common::DataFusionError;
 use datafusion_common::Result;
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
 };
+use datafusion_macros::user_doc;
 use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
 
+#[user_doc(
+    doc_section(label = "String Functions"),
+    description = "Return true if search_str is found within string 
(case-sensitive).",
+    syntax_example = "contains(str, search_str)",
+    sql_example = r#"```sql
+> select contains('the quick brown fox', 'row');
++---------------------------------------------------+
+| contains(Utf8("the quick brown fox"),Utf8("row")) |
++---------------------------------------------------+
+| true                                              |
++---------------------------------------------------+
+```"#,
+    standard_argument(name = "str", prefix = "String"),
+    argument(name = "search_str", description = "The string to search for in 
str.")
+)]
 #[derive(Debug)]
 pub struct ContainsFunc {
     signature: Signature,
@@ -75,35 +90,10 @@ impl ScalarUDFImpl for ContainsFunc {
     }
 
     fn documentation(&self) -> Option<&Documentation> {
-        Some(get_contains_doc())
+        self.doc()
     }
 }
 
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_contains_doc() -> &'static Documentation {
-    DOCUMENTATION.get_or_init(|| {
-        Documentation::builder(
-            DOC_SECTION_STRING,
-            "Return true if search_str is found within string 
(case-sensitive).",
-            "contains(str, search_str)",
-        )
-        .with_sql_example(
-            r#"```sql
-> select contains('the quick brown fox', 'row');
-+---------------------------------------------------+
-| contains(Utf8("the quick brown fox"),Utf8("row")) |
-+---------------------------------------------------+
-| true                                              |
-+---------------------------------------------------+
-```"#,
-        )
-        .with_standard_argument("str", Some("String"))
-        .with_argument("search_str", "The string to search for in str.")
-        .build()
-    })
-}
-
 /// use `arrow::compute::contains` to do the calculation for contains
 pub fn contains(args: &[ArrayRef]) -> Result<ArrayRef, DataFusionError> {
     match (args[0].data_type(), args[1].data_type()) {
diff --git a/datafusion/functions/src/string/octet_length.rs 
b/datafusion/functions/src/string/octet_length.rs
index 26355556ff..f443571112 100644
--- a/datafusion/functions/src/string/octet_length.rs
+++ b/datafusion/functions/src/string/octet_length.rs
@@ -18,14 +18,29 @@
 use arrow::compute::kernels::length::length;
 use arrow::datatypes::DataType;
 use std::any::Any;
-use std::sync::OnceLock;
 
 use crate::utils::utf8_to_int_type;
 use datafusion_common::{exec_err, Result, ScalarValue};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
 use datafusion_expr::{ColumnarValue, Documentation, Volatility};
 use datafusion_expr::{ScalarUDFImpl, Signature};
+use datafusion_macros::user_doc;
 
+#[user_doc(
+    doc_section(label = "String Functions"),
+    description = "Returns the length of a string in bytes.",
+    syntax_example = "octet_length(str)",
+    sql_example = r#"```sql
+> select octet_length('Ångström');
++--------------------------------+
+| octet_length(Utf8("Ångström")) |
++--------------------------------+
+| 10                             |
++--------------------------------+
+```"#,
+    standard_argument(name = "str", prefix = "String"),
+    related_udf(name = "bit_length"),
+    related_udf(name = "length")
+)]
 #[derive(Debug)]
 pub struct OctetLengthFunc {
     signature: Signature,
@@ -92,36 +107,10 @@ impl ScalarUDFImpl for OctetLengthFunc {
     }
 
     fn documentation(&self) -> Option<&Documentation> {
-        Some(get_octet_length_doc())
+        self.doc()
     }
 }
 
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_octet_length_doc() -> &'static Documentation {
-    DOCUMENTATION.get_or_init(|| {
-        Documentation::builder(
-            DOC_SECTION_STRING,
-            "Returns the length of a string in bytes.",
-            "octet_length(str)",
-        )
-        .with_sql_example(
-            r#"```sql
-> select octet_length('Ångström');
-+--------------------------------+
-| octet_length(Utf8("Ångström")) |
-+--------------------------------+
-| 10                             |
-+--------------------------------+
-```"#,
-        )
-        .with_standard_argument("str", Some("String"))
-        .with_related_udf("bit_length")
-        .with_related_udf("length")
-        .build()
-    })
-}
-
 #[cfg(test)]
 mod tests {
     use std::sync::Arc;
diff --git a/datafusion/functions/src/string/rtrim.rs 
b/datafusion/functions/src/string/rtrim.rs
index ff8430f153..3fb208bb71 100644
--- a/datafusion/functions/src/string/rtrim.rs
+++ b/datafusion/functions/src/string/rtrim.rs
@@ -18,15 +18,14 @@
 use arrow::array::{ArrayRef, OffsetSizeTrait};
 use arrow::datatypes::DataType;
 use std::any::Any;
-use std::sync::OnceLock;
 
 use crate::string::common::*;
 use crate::utils::{make_scalar_function, utf8_to_str_type};
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::function::Hint;
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
 use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility};
 use datafusion_expr::{ScalarUDFImpl, Signature};
+use datafusion_macros::user_doc;
 
 /// Returns the longest string  with trailing characters removed. If the 
characters are not specified, whitespace is removed.
 /// rtrim('testxxzx', 'xyz') = 'test'
@@ -35,6 +34,33 @@ fn rtrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> 
Result<ArrayRef> {
     general_trim::<T>(args, TrimType::Right, use_string_view)
 }
 
+#[user_doc(
+    doc_section(label = "String Functions"),
+    description = "Trims the specified trim string from the end of a string. 
If no trim string is provided, all whitespace is removed from the end of the 
input string.",
+    syntax_example = "rtrim(str[, trim_str])",
+    alternative_syntax = "trim(TRAILING trim_str FROM str)",
+    sql_example = r#"```sql
+> select rtrim('  datafusion  ');
++-------------------------------+
+| rtrim(Utf8("  datafusion  ")) |
++-------------------------------+
+|   datafusion                  |
++-------------------------------+
+> select rtrim('___datafusion___', '_');
++-------------------------------------------+
+| rtrim(Utf8("___datafusion___"),Utf8("_")) |
++-------------------------------------------+
+| ___datafusion                             |
++-------------------------------------------+
+```"#,
+    standard_argument(name = "str", prefix = "String"),
+    argument(
+        name = "trim_str",
+        description = "String expression to trim from the end of the input 
string. Can be a constant, column, or function, and any combination of 
arithmetic operators. _Default is whitespace characters._"
+    ),
+    related_udf(name = "btrim"),
+    related_udf(name = "ltrim")
+)]
 #[derive(Debug)]
 pub struct RtrimFunc {
     signature: Signature,
@@ -100,41 +126,10 @@ impl ScalarUDFImpl for RtrimFunc {
     }
 
     fn documentation(&self) -> Option<&Documentation> {
-        Some(get_rtrim_doc())
+        self.doc()
     }
 }
 
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_rtrim_doc() -> &'static Documentation {
-    DOCUMENTATION.get_or_init(|| {
-        Documentation::builder(
-            DOC_SECTION_STRING,
-            "Trims the specified trim string from the end of a string. If no 
trim string is provided, all whitespace is removed from the end of the input 
string.",
-            "rtrim(str[, trim_str])")
-            .with_sql_example(r#"```sql
-> select rtrim('  datafusion  ');
-+-------------------------------+
-| rtrim(Utf8("  datafusion  ")) |
-+-------------------------------+
-|   datafusion                  |
-+-------------------------------+
-> select rtrim('___datafusion___', '_');
-+-------------------------------------------+
-| rtrim(Utf8("___datafusion___"),Utf8("_")) |
-+-------------------------------------------+
-| ___datafusion                             |
-+-------------------------------------------+
-```"#)
-            .with_standard_argument("str", Some("String"))
-            .with_argument("trim_str", "String expression to trim from the end 
of the input string. Can be a constant, column, or function, and any 
combination of arithmetic operators. _Default is whitespace characters._")
-            .with_alternative_syntax("trim(TRAILING trim_str FROM str)")
-            .with_related_udf("btrim")
-            .with_related_udf("ltrim")
-            .build()
-    })
-}
-
 #[cfg(test)]
 mod tests {
     use arrow::array::{Array, StringArray, StringViewArray};
diff --git a/datafusion/functions/src/unicode/character_length.rs 
b/datafusion/functions/src/unicode/character_length.rs
index ad51a8ef72..ee436276fb 100644
--- a/datafusion/functions/src/unicode/character_length.rs
+++ b/datafusion/functions/src/unicode/character_length.rs
@@ -22,13 +22,29 @@ use arrow::array::{
 };
 use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
 use datafusion_common::Result;
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
 };
+use datafusion_macros::user_doc;
 use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
 
+#[user_doc(
+    doc_section(label = "String Functions"),
+    description = "Returns the number of characters in a string.",
+    syntax_example = "character_length(str)",
+    sql_example = r#"```sql
+> select character_length('Ångström');
++------------------------------------+
+| character_length(Utf8("Ångström")) |
++------------------------------------+
+| 8                                  |
++------------------------------------+
+```"#,
+    standard_argument(name = "str", prefix = "String"),
+    related_udf(name = "bit_length"),
+    related_udf(name = "octet_length")
+)]
 #[derive(Debug)]
 pub struct CharacterLengthFunc {
     signature: Signature,
@@ -85,36 +101,10 @@ impl ScalarUDFImpl for CharacterLengthFunc {
     }
 
     fn documentation(&self) -> Option<&Documentation> {
-        Some(get_character_length_doc())
+        self.doc()
     }
 }
 
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_character_length_doc() -> &'static Documentation {
-    DOCUMENTATION.get_or_init(|| {
-        Documentation::builder(
-            DOC_SECTION_STRING,
-            "Returns the number of characters in a string.",
-            "character_length(str)",
-        )
-        .with_sql_example(
-            r#"```sql
-> select character_length('Ångström');
-+------------------------------------+
-| character_length(Utf8("Ångström")) |
-+------------------------------------+
-| 8                                  |
-+------------------------------------+
-```"#,
-        )
-        .with_standard_argument("str", Some("String"))
-        .with_related_udf("bit_length")
-        .with_related_udf("octet_length")
-        .build()
-    })
-}
-
 /// Returns number of characters in the string.
 /// character_length('josé') = 4
 /// The implementation counts UTF-8 code points to count the number of 
characters


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to