This is an automated email from the ASF dual-hosted git repository.
comphead pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 9b5995fa02 doc-gen: migrate scalar functions (string) documentation
1/4 (#13924)
9b5995fa02 is described below
commit 9b5995fa024d95c19e1270447e13f3c9dd428c69
Author: Ian Lai <[email protected]>
AuthorDate: Tue Dec 31 01:40:43 2024 +0800
doc-gen: migrate scalar functions (string) documentation 1/4 (#13924)
Co-authored-by: Cheng-Yuan-Lai <a186235@g,ail.com>
---
datafusion/functions/src/string/ascii.rs | 58 +++++++++-----------
datafusion/functions/src/string/bit_length.rs | 47 +++++++---------
datafusion/functions/src/string/chr.rs | 46 +++++++---------
datafusion/functions/src/string/contains.rs | 46 +++++++---------
datafusion/functions/src/string/octet_length.rs | 47 +++++++---------
datafusion/functions/src/string/rtrim.rs | 63 ++++++++++------------
.../functions/src/unicode/character_length.rs | 48 +++++++----------
7 files changed, 144 insertions(+), 211 deletions(-)
diff --git a/datafusion/functions/src/string/ascii.rs
b/datafusion/functions/src/string/ascii.rs
index f366329b4f..858eddc7c8 100644
--- a/datafusion/functions/src/string/ascii.rs
+++ b/datafusion/functions/src/string/ascii.rs
@@ -20,12 +20,33 @@ use arrow::array::{ArrayAccessor, ArrayIter, ArrayRef,
AsArray, Int32Array};
use arrow::datatypes::DataType;
use arrow::error::ArrowError;
use datafusion_common::{internal_err, Result};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use datafusion_macros::user_doc;
use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
+#[user_doc(
+ doc_section(label = "String Functions"),
+ description = "Returns the Unicode character code of the first character
in a string.",
+ syntax_example = "ascii(str)",
+ sql_example = r#"```sql
+> select ascii('abc');
++--------------------+
+| ascii(Utf8("abc")) |
++--------------------+
+| 97 |
++--------------------+
+> select ascii('🚀');
++-------------------+
+| ascii(Utf8("🚀")) |
++-------------------+
+| 128640 |
++-------------------+
+```"#,
+ standard_argument(name = "str", prefix = "String"),
+ related_udf(name = "chr")
+)]
#[derive(Debug)]
pub struct AsciiFunc {
signature: Signature,
@@ -73,41 +94,10 @@ impl ScalarUDFImpl for AsciiFunc {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_ascii_doc())
+ self.doc()
}
}
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_ascii_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_STRING,
- "Returns the Unicode character code of the first character in a
string.",
- "ascii(str)",
- )
- .with_sql_example(
- r#"```sql
-> select ascii('abc');
-+--------------------+
-| ascii(Utf8("abc")) |
-+--------------------+
-| 97 |
-+--------------------+
-> select ascii('🚀');
-+-------------------+
-| ascii(Utf8("🚀")) |
-+-------------------+
-| 128640 |
-+-------------------+
-```"#,
- )
- .with_standard_argument("str", Some("String"))
- .with_related_udf("chr")
- .build()
- })
-}
-
fn calculate_ascii<'a, V>(array: V) -> Result<ArrayRef, ArrowError>
where
V: ArrayAccessor<Item = &'a str>,
diff --git a/datafusion/functions/src/string/bit_length.rs
b/datafusion/functions/src/string/bit_length.rs
index 5a23692d85..623fb2ba03 100644
--- a/datafusion/functions/src/string/bit_length.rs
+++ b/datafusion/functions/src/string/bit_length.rs
@@ -18,14 +18,29 @@
use arrow::compute::kernels::length::bit_length;
use arrow::datatypes::DataType;
use std::any::Any;
-use std::sync::OnceLock;
use crate::utils::utf8_to_int_type;
use datafusion_common::{exec_err, Result, ScalarValue};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
+use datafusion_macros::user_doc;
+#[user_doc(
+ doc_section(label = "String Functions"),
+ description = "Returns the bit length of a string.",
+ syntax_example = "bit_length(str)",
+ sql_example = r#"```sql
+> select bit_length('datafusion');
++--------------------------------+
+| bit_length(Utf8("datafusion")) |
++--------------------------------+
+| 80 |
++--------------------------------+
+```"#,
+ standard_argument(name = "str", prefix = "String"),
+ related_udf(name = "length"),
+ related_udf(name = "octet_length")
+)]
#[derive(Debug)]
pub struct BitLengthFunc {
signature: Signature,
@@ -92,32 +107,6 @@ impl ScalarUDFImpl for BitLengthFunc {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_bit_length_doc())
+ self.doc()
}
}
-
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_bit_length_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_STRING,
- "Returns the bit length of a string.",
- "bit_length(str)",
- )
- .with_sql_example(
- r#"```sql
-> select bit_length('datafusion');
-+--------------------------------+
-| bit_length(Utf8("datafusion")) |
-+--------------------------------+
-| 80 |
-+--------------------------------+
-```"#,
- )
- .with_standard_argument("str", Some("String"))
- .with_related_udf("length")
- .with_related_udf("octet_length")
- .build()
- })
-}
diff --git a/datafusion/functions/src/string/chr.rs
b/datafusion/functions/src/string/chr.rs
index 127b02cdf7..3530e3f22c 100644
--- a/datafusion/functions/src/string/chr.rs
+++ b/datafusion/functions/src/string/chr.rs
@@ -16,7 +16,7 @@
// under the License.
use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
use arrow::array::ArrayRef;
use arrow::array::StringArray;
@@ -27,9 +27,9 @@ use arrow::datatypes::DataType::Utf8;
use crate::utils::make_scalar_function;
use datafusion_common::cast::as_int64_array;
use datafusion_common::{exec_err, Result};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
+use datafusion_macros::user_doc;
/// Returns the character with the given code. chr(0) is disallowed because
text data types cannot store that character.
/// chr(65) = 'A'
@@ -60,6 +60,21 @@ pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
Ok(Arc::new(result) as ArrayRef)
}
+#[user_doc(
+ doc_section(label = "String Functions"),
+ description = "Returns the character with the specified ASCII or Unicode
code value.",
+ syntax_example = "chr(expression)",
+ sql_example = r#"```sql
+> select chr(128640);
++--------------------+
+| chr(Int64(128640)) |
++--------------------+
+| 🚀 |
++--------------------+
+```"#,
+ standard_argument(name = "expression", prefix = "String"),
+ related_udf(name = "ascii")
+)]
#[derive(Debug)]
pub struct ChrFunc {
signature: Signature,
@@ -105,31 +120,6 @@ impl ScalarUDFImpl for ChrFunc {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_chr_doc())
+ self.doc()
}
}
-
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_chr_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_STRING,
- "Returns the character with the specified ASCII or Unicode code
value.",
- "chr(expression)",
- )
- .with_sql_example(
- r#"```sql
-> select chr(128640);
-+--------------------+
-| chr(Int64(128640)) |
-+--------------------+
-| 🚀 |
-+--------------------+
-```"#,
- )
- .with_standard_argument("expression", Some("String"))
- .with_related_udf("ascii")
- .build()
- })
-}
diff --git a/datafusion/functions/src/string/contains.rs
b/datafusion/functions/src/string/contains.rs
index 3e5c72ac20..36871f0c32 100644
--- a/datafusion/functions/src/string/contains.rs
+++ b/datafusion/functions/src/string/contains.rs
@@ -23,13 +23,28 @@ use arrow::datatypes::DataType::{Boolean, LargeUtf8, Utf8,
Utf8View};
use datafusion_common::exec_err;
use datafusion_common::DataFusionError;
use datafusion_common::Result;
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
+use datafusion_macros::user_doc;
use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
+#[user_doc(
+ doc_section(label = "String Functions"),
+ description = "Return true if search_str is found within string
(case-sensitive).",
+ syntax_example = "contains(str, search_str)",
+ sql_example = r#"```sql
+> select contains('the quick brown fox', 'row');
++---------------------------------------------------+
+| contains(Utf8("the quick brown fox"),Utf8("row")) |
++---------------------------------------------------+
+| true |
++---------------------------------------------------+
+```"#,
+ standard_argument(name = "str", prefix = "String"),
+ argument(name = "search_str", description = "The string to search for in
str.")
+)]
#[derive(Debug)]
pub struct ContainsFunc {
signature: Signature,
@@ -75,35 +90,10 @@ impl ScalarUDFImpl for ContainsFunc {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_contains_doc())
+ self.doc()
}
}
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_contains_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_STRING,
- "Return true if search_str is found within string
(case-sensitive).",
- "contains(str, search_str)",
- )
- .with_sql_example(
- r#"```sql
-> select contains('the quick brown fox', 'row');
-+---------------------------------------------------+
-| contains(Utf8("the quick brown fox"),Utf8("row")) |
-+---------------------------------------------------+
-| true |
-+---------------------------------------------------+
-```"#,
- )
- .with_standard_argument("str", Some("String"))
- .with_argument("search_str", "The string to search for in str.")
- .build()
- })
-}
-
/// use `arrow::compute::contains` to do the calculation for contains
pub fn contains(args: &[ArrayRef]) -> Result<ArrayRef, DataFusionError> {
match (args[0].data_type(), args[1].data_type()) {
diff --git a/datafusion/functions/src/string/octet_length.rs
b/datafusion/functions/src/string/octet_length.rs
index 26355556ff..f443571112 100644
--- a/datafusion/functions/src/string/octet_length.rs
+++ b/datafusion/functions/src/string/octet_length.rs
@@ -18,14 +18,29 @@
use arrow::compute::kernels::length::length;
use arrow::datatypes::DataType;
use std::any::Any;
-use std::sync::OnceLock;
use crate::utils::utf8_to_int_type;
use datafusion_common::{exec_err, Result, ScalarValue};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
+use datafusion_macros::user_doc;
+#[user_doc(
+ doc_section(label = "String Functions"),
+ description = "Returns the length of a string in bytes.",
+ syntax_example = "octet_length(str)",
+ sql_example = r#"```sql
+> select octet_length('Ångström');
++--------------------------------+
+| octet_length(Utf8("Ångström")) |
++--------------------------------+
+| 10 |
++--------------------------------+
+```"#,
+ standard_argument(name = "str", prefix = "String"),
+ related_udf(name = "bit_length"),
+ related_udf(name = "length")
+)]
#[derive(Debug)]
pub struct OctetLengthFunc {
signature: Signature,
@@ -92,36 +107,10 @@ impl ScalarUDFImpl for OctetLengthFunc {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_octet_length_doc())
+ self.doc()
}
}
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_octet_length_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_STRING,
- "Returns the length of a string in bytes.",
- "octet_length(str)",
- )
- .with_sql_example(
- r#"```sql
-> select octet_length('Ångström');
-+--------------------------------+
-| octet_length(Utf8("Ångström")) |
-+--------------------------------+
-| 10 |
-+--------------------------------+
-```"#,
- )
- .with_standard_argument("str", Some("String"))
- .with_related_udf("bit_length")
- .with_related_udf("length")
- .build()
- })
-}
-
#[cfg(test)]
mod tests {
use std::sync::Arc;
diff --git a/datafusion/functions/src/string/rtrim.rs
b/datafusion/functions/src/string/rtrim.rs
index ff8430f153..3fb208bb71 100644
--- a/datafusion/functions/src/string/rtrim.rs
+++ b/datafusion/functions/src/string/rtrim.rs
@@ -18,15 +18,14 @@
use arrow::array::{ArrayRef, OffsetSizeTrait};
use arrow::datatypes::DataType;
use std::any::Any;
-use std::sync::OnceLock;
use crate::string::common::*;
use crate::utils::{make_scalar_function, utf8_to_str_type};
use datafusion_common::{exec_err, Result};
use datafusion_expr::function::Hint;
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
+use datafusion_macros::user_doc;
/// Returns the longest string with trailing characters removed. If the
characters are not specified, whitespace is removed.
/// rtrim('testxxzx', 'xyz') = 'test'
@@ -35,6 +34,33 @@ fn rtrim<T: OffsetSizeTrait>(args: &[ArrayRef]) ->
Result<ArrayRef> {
general_trim::<T>(args, TrimType::Right, use_string_view)
}
+#[user_doc(
+ doc_section(label = "String Functions"),
+ description = "Trims the specified trim string from the end of a string.
If no trim string is provided, all whitespace is removed from the end of the
input string.",
+ syntax_example = "rtrim(str[, trim_str])",
+ alternative_syntax = "trim(TRAILING trim_str FROM str)",
+ sql_example = r#"```sql
+> select rtrim(' datafusion ');
++-------------------------------+
+| rtrim(Utf8(" datafusion ")) |
++-------------------------------+
+| datafusion |
++-------------------------------+
+> select rtrim('___datafusion___', '_');
++-------------------------------------------+
+| rtrim(Utf8("___datafusion___"),Utf8("_")) |
++-------------------------------------------+
+| ___datafusion |
++-------------------------------------------+
+```"#,
+ standard_argument(name = "str", prefix = "String"),
+ argument(
+ name = "trim_str",
+ description = "String expression to trim from the end of the input
string. Can be a constant, column, or function, and any combination of
arithmetic operators. _Default is whitespace characters._"
+ ),
+ related_udf(name = "btrim"),
+ related_udf(name = "ltrim")
+)]
#[derive(Debug)]
pub struct RtrimFunc {
signature: Signature,
@@ -100,41 +126,10 @@ impl ScalarUDFImpl for RtrimFunc {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_rtrim_doc())
+ self.doc()
}
}
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_rtrim_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_STRING,
- "Trims the specified trim string from the end of a string. If no
trim string is provided, all whitespace is removed from the end of the input
string.",
- "rtrim(str[, trim_str])")
- .with_sql_example(r#"```sql
-> select rtrim(' datafusion ');
-+-------------------------------+
-| rtrim(Utf8(" datafusion ")) |
-+-------------------------------+
-| datafusion |
-+-------------------------------+
-> select rtrim('___datafusion___', '_');
-+-------------------------------------------+
-| rtrim(Utf8("___datafusion___"),Utf8("_")) |
-+-------------------------------------------+
-| ___datafusion |
-+-------------------------------------------+
-```"#)
- .with_standard_argument("str", Some("String"))
- .with_argument("trim_str", "String expression to trim from the end
of the input string. Can be a constant, column, or function, and any
combination of arithmetic operators. _Default is whitespace characters._")
- .with_alternative_syntax("trim(TRAILING trim_str FROM str)")
- .with_related_udf("btrim")
- .with_related_udf("ltrim")
- .build()
- })
-}
-
#[cfg(test)]
mod tests {
use arrow::array::{Array, StringArray, StringViewArray};
diff --git a/datafusion/functions/src/unicode/character_length.rs
b/datafusion/functions/src/unicode/character_length.rs
index ad51a8ef72..ee436276fb 100644
--- a/datafusion/functions/src/unicode/character_length.rs
+++ b/datafusion/functions/src/unicode/character_length.rs
@@ -22,13 +22,29 @@ use arrow::array::{
};
use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
use datafusion_common::Result;
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
+use datafusion_macros::user_doc;
use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
+#[user_doc(
+ doc_section(label = "String Functions"),
+ description = "Returns the number of characters in a string.",
+ syntax_example = "character_length(str)",
+ sql_example = r#"```sql
+> select character_length('Ångström');
++------------------------------------+
+| character_length(Utf8("Ångström")) |
++------------------------------------+
+| 8 |
++------------------------------------+
+```"#,
+ standard_argument(name = "str", prefix = "String"),
+ related_udf(name = "bit_length"),
+ related_udf(name = "octet_length")
+)]
#[derive(Debug)]
pub struct CharacterLengthFunc {
signature: Signature,
@@ -85,36 +101,10 @@ impl ScalarUDFImpl for CharacterLengthFunc {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_character_length_doc())
+ self.doc()
}
}
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_character_length_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_STRING,
- "Returns the number of characters in a string.",
- "character_length(str)",
- )
- .with_sql_example(
- r#"```sql
-> select character_length('Ångström');
-+------------------------------------+
-| character_length(Utf8("Ångström")) |
-+------------------------------------+
-| 8 |
-+------------------------------------+
-```"#,
- )
- .with_standard_argument("str", Some("String"))
- .with_related_udf("bit_length")
- .with_related_udf("octet_length")
- .build()
- })
-}
-
/// Returns number of characters in the string.
/// character_length('josé') = 4
/// The implementation counts UTF-8 code points to count the number of
characters
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]