This is an automated email from the ASF dual-hosted git repository.
jonah pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 618767e8be doc-gen: migrate scalar functions (string) documentation
3/4 (#13926)
618767e8be is described below
commit 618767e8befd3c5bf39d07efb48d66365a5f4537
Author: Ian Lai <[email protected]>
AuthorDate: Sat Dec 28 22:16:03 2024 +0800
doc-gen: migrate scalar functions (string) documentation 3/4 (#13926)
Co-authored-by: Cheng-Yuan-Lai <a186235@g,ail.com>
---
datafusion/functions/src/string/repeat.rs | 49 +++++++++++--------------
datafusion/functions/src/string/replace.rs | 49 +++++++++++--------------
datafusion/functions/src/string/split_part.rs | 45 ++++++++++-------------
datafusion/functions/src/string/starts_with.rs | 46 +++++++++--------------
datafusion/functions/src/string/to_hex.rs | 45 +++++++++--------------
datafusion/functions/src/string/uuid.rs | 39 ++++++++------------
datafusion/functions/src/unicode/find_in_set.rs | 46 +++++++++++------------
datafusion/functions/src/unicode/reverse.rs | 44 +++++++++-------------
8 files changed, 152 insertions(+), 211 deletions(-)
diff --git a/datafusion/functions/src/string/repeat.rs
b/datafusion/functions/src/string/repeat.rs
index 044b354924..d5ebf902c1 100644
--- a/datafusion/functions/src/string/repeat.rs
+++ b/datafusion/functions/src/string/repeat.rs
@@ -16,7 +16,7 @@
// under the License.
use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
use crate::strings::StringArrayType;
use crate::utils::{make_scalar_function, utf8_to_str_type};
@@ -29,11 +29,29 @@ use arrow::datatypes::DataType::{LargeUtf8, Utf8, Utf8View};
use datafusion_common::cast::as_int64_array;
use datafusion_common::types::{logical_int64, logical_string};
use datafusion_common::{exec_err, Result};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
use datafusion_expr_common::signature::TypeSignatureClass;
+use datafusion_macros::user_doc;
+#[user_doc(
+ doc_section(label = "String Functions"),
+ description = "Returns a string with an input string repeated a specified
number.",
+ syntax_example = "repeat(str, n)",
+ sql_example = r#"```sql
+> select repeat('data', 3);
++-------------------------------+
+| repeat(Utf8("data"),Int64(3)) |
++-------------------------------+
+| datadatadata |
++-------------------------------+
+```"#,
+ standard_argument(name = "str", prefix = "String"),
+ argument(
+ name = "n",
+ description = "Number of times to repeat the input string."
+ )
+)]
#[derive(Debug)]
pub struct RepeatFunc {
signature: Signature,
@@ -85,35 +103,10 @@ impl ScalarUDFImpl for RepeatFunc {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_repeat_doc())
+ self.doc()
}
}
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_repeat_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_STRING,
- "Returns a string with an input string repeated a specified
number.",
- "repeat(str, n)",
- )
- .with_sql_example(
- r#"```sql
-> select repeat('data', 3);
-+-------------------------------+
-| repeat(Utf8("data"),Int64(3)) |
-+-------------------------------+
-| datadatadata |
-+-------------------------------+
-```"#,
- )
- .with_standard_argument("str", Some("String"))
- .with_argument("n", "Number of times to repeat the input string.")
- .build()
- })
-}
-
/// Repeats string the specified number of times.
/// repeat('Pg', 4) = 'PgPgPgPg'
fn repeat(args: &[ArrayRef]) -> Result<ArrayRef> {
diff --git a/datafusion/functions/src/string/replace.rs
b/datafusion/functions/src/string/replace.rs
index 9b71d3871e..9b6afc5469 100644
--- a/datafusion/functions/src/string/replace.rs
+++ b/datafusion/functions/src/string/replace.rs
@@ -16,7 +16,7 @@
// under the License.
use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait, StringArray};
use arrow::datatypes::DataType;
@@ -24,10 +24,28 @@ use arrow::datatypes::DataType;
use crate::utils::{make_scalar_function, utf8_to_str_type};
use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
use datafusion_common::{exec_err, Result};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
-
+use datafusion_macros::user_doc;
+#[user_doc(
+ doc_section(label = "String Functions"),
+ description = "Replaces all occurrences of a specified substring in a
string with a new substring.",
+ syntax_example = "replace(str, substr, replacement)",
+ sql_example = r#"```sql
+> select replace('ABabbaBA', 'ab', 'cd');
++-------------------------------------------------+
+| replace(Utf8("ABabbaBA"),Utf8("ab"),Utf8("cd")) |
++-------------------------------------------------+
+| ABcdbaBA |
++-------------------------------------------------+
+```"#,
+ standard_argument(name = "str", prefix = "String"),
+ standard_argument(
+ name = "substr",
+ prefix = "Substring expression to replace in the input string.
Substring"
+ ),
+ standard_argument(name = "replacement", prefix = "Replacement substring")
+)]
#[derive(Debug)]
pub struct ReplaceFunc {
signature: Signature,
@@ -80,33 +98,10 @@ impl ScalarUDFImpl for ReplaceFunc {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_replace_doc())
+ self.doc()
}
}
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_replace_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_STRING,
- "Replaces all occurrences of a specified substring in a string
with a new substring.",
- "replace(str, substr, replacement)")
- .with_sql_example(r#"```sql
-> select replace('ABabbaBA', 'ab', 'cd');
-+-------------------------------------------------+
-| replace(Utf8("ABabbaBA"),Utf8("ab"),Utf8("cd")) |
-+-------------------------------------------------+
-| ABcdbaBA |
-+-------------------------------------------------+
-```"#)
- .with_standard_argument("str", Some("String"))
- .with_standard_argument("substr", Some("Substring expression to
replace in the input string. Substring"))
- .with_standard_argument("replacement", Some("Replacement
substring"))
- .build()
- })
-}
-
fn replace_view(args: &[ArrayRef]) -> Result<ArrayRef> {
let string_array = as_string_view_array(&args[0])?;
let from_array = as_string_view_array(&args[1])?;
diff --git a/datafusion/functions/src/string/split_part.rs
b/datafusion/functions/src/string/split_part.rs
index 40bdd3ad01..9a6ee72669 100644
--- a/datafusion/functions/src/string/split_part.rs
+++ b/datafusion/functions/src/string/split_part.rs
@@ -25,12 +25,28 @@ use arrow::datatypes::DataType;
use datafusion_common::cast::as_int64_array;
use datafusion_common::ScalarValue;
use datafusion_common::{exec_err, DataFusionError, Result};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
+use datafusion_macros::user_doc;
use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
+#[user_doc(
+ doc_section(label = "String Functions"),
+ description = "Splits a string based on a specified delimiter and returns
the substring in the specified position.",
+ syntax_example = "split_part(str, delimiter, pos)",
+ sql_example = r#"```sql
+> select split_part('1.2.3.4.5', '.', 3);
++--------------------------------------------------+
+| split_part(Utf8("1.2.3.4.5"),Utf8("."),Int64(3)) |
++--------------------------------------------------+
+| 3 |
++--------------------------------------------------+
+```"#,
+ standard_argument(name = "str", prefix = "String"),
+ argument(name = "delimiter", description = "String or character to split
on."),
+ argument(name = "pos", description = "Position of the part to return.")
+)]
#[derive(Debug)]
pub struct SplitPartFunc {
signature: Signature,
@@ -182,33 +198,10 @@ impl ScalarUDFImpl for SplitPartFunc {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_split_part_doc())
+ self.doc()
}
}
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_split_part_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_STRING,
- "Splits a string based on a specified delimiter and returns the
substring in the specified position.",
- "split_part(str, delimiter, pos)")
- .with_sql_example(r#"```sql
-> select split_part('1.2.3.4.5', '.', 3);
-+--------------------------------------------------+
-| split_part(Utf8("1.2.3.4.5"),Utf8("."),Int64(3)) |
-+--------------------------------------------------+
-| 3 |
-+--------------------------------------------------+
-```"#)
- .with_standard_argument("str", Some("String"))
- .with_argument("delimiter", "String or character to split on.")
- .with_argument("pos", "Position of the part to return.")
- .build()
- })
-}
-
/// impl
pub fn split_part_impl<'a, StringArrType, DelimiterArrType, StringArrayLen>(
string_array: StringArrType,
diff --git a/datafusion/functions/src/string/starts_with.rs
b/datafusion/functions/src/string/starts_with.rs
index 7354fda095..229982a961 100644
--- a/datafusion/functions/src/string/starts_with.rs
+++ b/datafusion/functions/src/string/starts_with.rs
@@ -16,16 +16,16 @@
// under the License.
use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
use arrow::array::ArrayRef;
use arrow::datatypes::DataType;
use crate::utils::make_scalar_function;
use datafusion_common::{internal_err, Result};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use datafusion_macros::user_doc;
/// Returns true if string starts with prefix.
/// starts_with('alphabet', 'alph') = 't'
@@ -34,6 +34,21 @@ pub fn starts_with(args: &[ArrayRef]) -> Result<ArrayRef> {
Ok(Arc::new(result) as ArrayRef)
}
+#[user_doc(
+ doc_section(label = "String Functions"),
+ description = "Tests if a string starts with a substring.",
+ syntax_example = "starts_with(str, substr)",
+ sql_example = r#"```sql
+> select starts_with('datafusion','data');
++----------------------------------------------+
+| starts_with(Utf8("datafusion"),Utf8("data")) |
++----------------------------------------------+
+| true |
++----------------------------------------------+
+```"#,
+ standard_argument(name = "str", prefix = "String"),
+ argument(name = "substr", description = "Substring to test for.")
+)]
#[derive(Debug)]
pub struct StartsWithFunc {
signature: Signature,
@@ -84,35 +99,10 @@ impl ScalarUDFImpl for StartsWithFunc {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_starts_with_doc())
+ self.doc()
}
}
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_starts_with_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_STRING,
- "Tests if a string starts with a substring.",
- "starts_with(str, substr)",
- )
- .with_sql_example(
- r#"```sql
-> select starts_with('datafusion','data');
-+----------------------------------------------+
-| starts_with(Utf8("datafusion"),Utf8("data")) |
-+----------------------------------------------+
-| true |
-+----------------------------------------------+
-```"#,
- )
- .with_standard_argument("str", Some("String"))
- .with_argument("substr", "Substring to test for.")
- .build()
- })
-}
-
#[cfg(test)]
mod tests {
use crate::utils::test::test_function;
diff --git a/datafusion/functions/src/string/to_hex.rs
b/datafusion/functions/src/string/to_hex.rs
index 04907af14a..64654ef6ef 100644
--- a/datafusion/functions/src/string/to_hex.rs
+++ b/datafusion/functions/src/string/to_hex.rs
@@ -16,7 +16,7 @@
// under the License.
use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
use arrow::datatypes::{
@@ -27,9 +27,10 @@ use crate::utils::make_scalar_function;
use datafusion_common::cast::as_primitive_array;
use datafusion_common::Result;
use datafusion_common::{exec_err, plan_err};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
+
use datafusion_expr::{ColumnarValue, Documentation};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use datafusion_macros::user_doc;
/// Converts the number to its equivalent hexadecimal representation.
/// to_hex(2147483647) = '7fffffff'
@@ -59,6 +60,20 @@ where
Ok(Arc::new(result) as ArrayRef)
}
+#[user_doc(
+ doc_section(label = "String Functions"),
+ description = "Converts an integer to a hexadecimal string.",
+ syntax_example = "to_hex(int)",
+ sql_example = r#"```sql
+> select to_hex(12345689);
++-------------------------+
+| to_hex(Int64(12345689)) |
++-------------------------+
+| bc6159 |
++-------------------------+
+```"#,
+ standard_argument(name = "int", prefix = "Integer")
+)]
#[derive(Debug)]
pub struct ToHexFunc {
signature: Signature,
@@ -116,34 +131,10 @@ impl ScalarUDFImpl for ToHexFunc {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_to_hex_doc())
+ self.doc()
}
}
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_to_hex_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_STRING,
- "Converts an integer to a hexadecimal string.",
- "to_hex(int)",
- )
- .with_sql_example(
- r#"```sql
-> select to_hex(12345689);
-+-------------------------+
-| to_hex(Int64(12345689)) |
-+-------------------------+
-| bc6159 |
-+-------------------------+
-```"#,
- )
- .with_standard_argument("int", Some("Integer"))
- .build()
- })
-}
-
#[cfg(test)]
mod tests {
use arrow::array::{Int32Array, StringArray};
diff --git a/datafusion/functions/src/string/uuid.rs
b/datafusion/functions/src/string/uuid.rs
index 6048a70bd8..f6d6a94106 100644
--- a/datafusion/functions/src/string/uuid.rs
+++ b/datafusion/functions/src/string/uuid.rs
@@ -16,7 +16,7 @@
// under the License.
use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
use arrow::array::GenericStringArray;
use arrow::datatypes::DataType;
@@ -24,10 +24,23 @@ use arrow::datatypes::DataType::Utf8;
use uuid::Uuid;
use datafusion_common::{internal_err, Result};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
+use datafusion_macros::user_doc;
+#[user_doc(
+ doc_section(label = "String Functions"),
+ description = "Returns [`UUID
v4`](https://en.wikipedia.org/wiki/Universally_unique_identifier#Version_4_(random))
string value which is unique per row.",
+ syntax_example = "uuid()",
+ sql_example = r#"```sql
+> select uuid();
++--------------------------------------+
+| uuid() |
++--------------------------------------+
+| 6ec17ef8-1934-41cc-8d59-d0c8f9eea1f0 |
++--------------------------------------+
+```"#
+)]
#[derive(Debug)]
pub struct UuidFunc {
signature: Signature,
@@ -80,26 +93,6 @@ impl ScalarUDFImpl for UuidFunc {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_uuid_doc())
+ self.doc()
}
}
-
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_uuid_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_STRING,
- "Returns [`UUID
v4`](https://en.wikipedia.org/wiki/Universally_unique_identifier#Version_4_(random))
string value which is unique per row.",
- "uuid()")
- .with_sql_example(r#"```sql
-> select uuid();
-+--------------------------------------+
-| uuid() |
-+--------------------------------------+
-| 6ec17ef8-1934-41cc-8d59-d0c8f9eea1f0 |
-+--------------------------------------+
-```"#)
- .build()
- })
-}
diff --git a/datafusion/functions/src/unicode/find_in_set.rs
b/datafusion/functions/src/unicode/find_in_set.rs
index 38efb408c1..c4d9b51f60 100644
--- a/datafusion/functions/src/unicode/find_in_set.rs
+++ b/datafusion/functions/src/unicode/find_in_set.rs
@@ -16,7 +16,7 @@
// under the License.
use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
use arrow::array::{
ArrayAccessor, ArrayIter, ArrayRef, ArrowPrimitiveType, AsArray,
OffsetSizeTrait,
@@ -26,12 +26,30 @@ use arrow::datatypes::{ArrowNativeType, DataType,
Int32Type, Int64Type};
use crate::utils::{make_scalar_function, utf8_to_int_type};
use datafusion_common::{exec_err, Result};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::Exact;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
+use datafusion_macros::user_doc;
+#[user_doc(
+ doc_section(label = "String Functions"),
+ description = "Returns a value in the range of 1 to N if the string str is
in the string list strlist consisting of N substrings.",
+ syntax_example = "find_in_set(str, strlist)",
+ sql_example = r#"```sql
+> select find_in_set('b', 'a,b,c,d');
++----------------------------------------+
+| find_in_set(Utf8("b"),Utf8("a,b,c,d")) |
++----------------------------------------+
+| 2 |
++----------------------------------------+
+```"#,
+ argument(name = "str", description = "String expression to find in
strlist."),
+ argument(
+ name = "strlist",
+ description = "A string list is a string composed of substrings
separated by , characters."
+ )
+)]
#[derive(Debug)]
pub struct FindInSetFunc {
signature: Signature,
@@ -85,32 +103,10 @@ impl ScalarUDFImpl for FindInSetFunc {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_find_in_set_doc())
+ self.doc()
}
}
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_find_in_set_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_STRING,
- "Returns a value in the range of 1 to N if the string str is in
the string list strlist consisting of N substrings.",
- "find_in_set(str, strlist)")
- .with_sql_example(r#"```sql
-> select find_in_set('b', 'a,b,c,d');
-+----------------------------------------+
-| find_in_set(Utf8("b"),Utf8("a,b,c,d")) |
-+----------------------------------------+
-| 2 |
-+----------------------------------------+
-```"#)
- .with_argument("str", "String expression to find in strlist.")
- .with_argument("strlist", "A string list is a string composed of
substrings separated by , characters.")
- .build()
- })
-}
-
///Returns a value in the range of 1 to N if the string str is in the string
list strlist consisting of N substrings
///A string list is a string composed of substrings separated by , characters.
fn find_in_set(args: &[ArrayRef]) -> Result<ArrayRef> {
diff --git a/datafusion/functions/src/unicode/reverse.rs
b/datafusion/functions/src/unicode/reverse.rs
index 8e3cf8845f..5ad347ed96 100644
--- a/datafusion/functions/src/unicode/reverse.rs
+++ b/datafusion/functions/src/unicode/reverse.rs
@@ -16,7 +16,7 @@
// under the License.
use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
use crate::utils::{make_scalar_function, utf8_to_str_type};
use arrow::array::{
@@ -25,12 +25,26 @@ use arrow::array::{
};
use arrow::datatypes::DataType;
use datafusion_common::{exec_err, Result};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
+use datafusion_macros::user_doc;
use DataType::{LargeUtf8, Utf8, Utf8View};
+#[user_doc(
+ doc_section(label = "String Functions"),
+ description = "Reverses the character order of a string.",
+ syntax_example = "reverse(str)",
+ sql_example = r#"```sql
+> select reverse('datafusion');
++-----------------------------+
+| reverse(Utf8("datafusion")) |
++-----------------------------+
+| noisufatad |
++-----------------------------+
+```"#,
+ standard_argument(name = "str", prefix = "String")
+)]
#[derive(Debug)]
pub struct ReverseFunc {
signature: Signature,
@@ -87,34 +101,10 @@ impl ScalarUDFImpl for ReverseFunc {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_reverse_doc())
+ self.doc()
}
}
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_reverse_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_STRING,
- "Reverses the character order of a string.",
- "reverse(str)",
- )
- .with_sql_example(
- r#"```sql
-> select reverse('datafusion');
-+-----------------------------+
-| reverse(Utf8("datafusion")) |
-+-----------------------------+
-| noisufatad |
-+-----------------------------+
-```"#,
- )
- .with_standard_argument("str", Some("String"))
- .build()
- })
-}
-
/// Reverses the order of the characters in the string.
/// reverse('abcde') = 'edcba'
/// The implementation uses UTF-8 code points as characters
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]