This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 5360d206f3 Migrate documentation for all string functions from
scalar_functions.md to code (#12775)
5360d206f3 is described below
commit 5360d206f39253f80179214e67dbac3f339b26ce
Author: Bruce Ritchie <[email protected]>
AuthorDate: Mon Oct 7 13:28:08 2024 -0400
Migrate documentation for all string functions from scalar_functions.md to
code (#12775)
* Added documentation for string and unicode functions.
* Fixed issues with aliases.
* Cargo fmt.
* Minor doc fixes.
* Update docs for var_pop/samp
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
datafusion/core/src/bin/print_functions_docs.rs | 9 +-
datafusion/functions/src/string/ascii.rs | 51 +-
datafusion/functions/src/string/bit_length.rs | 40 +-
datafusion/functions/src/string/btrim.rs | 41 +-
datafusion/functions/src/string/chr.rs | 39 +-
datafusion/functions/src/string/concat.rs | 40 +-
datafusion/functions/src/string/concat_ws.rs | 44 +-
datafusion/functions/src/string/contains.rs | 36 +-
datafusion/functions/src/string/ends_with.rs | 43 +-
datafusion/functions/src/string/initcap.rs | 36 +-
datafusion/functions/src/string/levenshtein.rs | 32 +-
datafusion/functions/src/string/lower.rs | 41 +-
datafusion/functions/src/string/ltrim.rs | 47 +-
datafusion/functions/src/string/octet_length.rs | 40 +-
datafusion/functions/src/string/overlay.rs | 37 +-
datafusion/functions/src/string/repeat.rs | 41 +-
datafusion/functions/src/string/replace.rs | 36 +-
datafusion/functions/src/string/rtrim.rs | 47 +-
datafusion/functions/src/string/split_part.rs | 36 +-
datafusion/functions/src/string/starts_with.rs | 37 +-
datafusion/functions/src/string/to_hex.rs | 36 +-
datafusion/functions/src/string/upper.rs | 34 +-
datafusion/functions/src/string/uuid.rs | 30 +-
.../functions/src/unicode/character_length.rs | 37 +-
datafusion/functions/src/unicode/find_in_set.rs | 37 +-
datafusion/functions/src/unicode/left.rs | 38 +-
datafusion/functions/src/unicode/lpad.rs | 41 +-
datafusion/functions/src/unicode/reverse.rs | 38 +-
datafusion/functions/src/unicode/right.rs | 35 +-
datafusion/functions/src/unicode/rpad.rs | 50 +-
datafusion/functions/src/unicode/strpos.rs | 34 +-
datafusion/functions/src/unicode/substr.rs | 35 +-
datafusion/functions/src/unicode/substrindex.rs | 46 +-
datafusion/functions/src/unicode/translate.rs | 35 +-
.../user-guide/sql/aggregate_functions_new.md | 51 +-
docs/source/user-guide/sql/scalar_functions.md | 626 +-------------
docs/source/user-guide/sql/scalar_functions_new.md | 927 ++++++++++++++++++++-
37 files changed, 2076 insertions(+), 827 deletions(-)
diff --git a/datafusion/core/src/bin/print_functions_docs.rs
b/datafusion/core/src/bin/print_functions_docs.rs
index 92737b244a..53cfe94eca 100644
--- a/datafusion/core/src/bin/print_functions_docs.rs
+++ b/datafusion/core/src/bin/print_functions_docs.rs
@@ -130,13 +130,14 @@ fn print_docs(
.find(|f| f.get_name() == name ||
f.get_aliases().contains(&name))
.unwrap();
- let name = f.get_name();
let aliases = f.get_aliases();
let documentation = f.get_documentation();
// if this name is an alias we need to display what it's an alias
of
if aliases.contains(&name) {
- let _ = write!(docs, "_Alias of [{name}](#{name})._");
+ let fname = f.get_name();
+ let _ = writeln!(docs, r#"### `{name}`"#);
+ let _ = writeln!(docs, "_Alias of [{fname}](#{fname})._");
continue;
}
@@ -183,10 +184,10 @@ fn print_docs(
// next, aliases
if !f.get_aliases().is_empty() {
- let _ = write!(docs, "#### Aliases");
+ let _ = writeln!(docs, "#### Aliases");
for alias in f.get_aliases() {
- let _ = writeln!(docs, "- {alias}");
+ let _ = writeln!(docs, "- {}", alias.replace("_",
r#"\_"#));
}
}
diff --git a/datafusion/functions/src/string/ascii.rs
b/datafusion/functions/src/string/ascii.rs
index d01c6631e9..1e828d0667 100644
--- a/datafusion/functions/src/string/ascii.rs
+++ b/datafusion/functions/src/string/ascii.rs
@@ -26,24 +26,6 @@ use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use std::any::Any;
use std::sync::{Arc, OnceLock};
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_ascii_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder()
- .with_doc_section(DOC_SECTION_STRING)
- .with_description("Returns the ASCII value of the first character
in a string.")
- .with_syntax_example("ascii(str)")
- .with_argument(
- "str",
- "String expression to operate on. Can be a constant, column,
or function that evaluates to or can be coerced to a Utf8, LargeUtf8 or a
Utf8View.",
- )
- .with_related_udf("chr")
- .build()
- .unwrap()
- })
-}
-
#[derive(Debug)]
pub struct AsciiFunc {
signature: Signature,
@@ -96,6 +78,39 @@ impl ScalarUDFImpl for AsciiFunc {
}
}
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_ascii_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description(
+ "Returns the Unicode character code of the first character in
a string.",
+ )
+ .with_syntax_example("ascii(str)")
+ .with_sql_example(
+ r#"```sql
+> select ascii('abc');
++--------------------+
+| ascii(Utf8("abc")) |
++--------------------+
+| 97 |
++--------------------+
+> select ascii('🚀');
++-------------------+
+| ascii(Utf8("🚀")) |
++-------------------+
+| 128640 |
++-------------------+
+```"#,
+ )
+ .with_standard_argument("str", "String")
+ .with_related_udf("chr")
+ .build()
+ .unwrap()
+ })
+}
+
fn calculate_ascii<'a, V>(array: V) -> Result<ArrayRef, ArrowError>
where
V: ArrayAccessor<Item = &'a str>,
diff --git a/datafusion/functions/src/string/bit_length.rs
b/datafusion/functions/src/string/bit_length.rs
index 65ec1a4a77..bd22c1504b 100644
--- a/datafusion/functions/src/string/bit_length.rs
+++ b/datafusion/functions/src/string/bit_length.rs
@@ -15,17 +15,17 @@
// specific language governing permissions and limitations
// under the License.
-use std::any::Any;
-
use arrow::compute::kernels::length::bit_length;
use arrow::datatypes::DataType;
+use std::any::Any;
+use std::sync::OnceLock;
+use crate::utils::utf8_to_int_type;
use datafusion_common::{exec_err, Result, ScalarValue};
-use datafusion_expr::{ColumnarValue, Volatility};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
+use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
-use crate::utils::utf8_to_int_type;
-
#[derive(Debug)]
pub struct BitLengthFunc {
signature: Signature,
@@ -88,4 +88,34 @@ impl ScalarUDFImpl for BitLengthFunc {
},
}
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_bit_length_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_bit_length_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description("Returns the bit length of a string.")
+ .with_syntax_example("bit_length(str)")
+ .with_sql_example(
+ r#"```sql
+> select bit_length('datafusion');
++--------------------------------+
+| bit_length(Utf8("datafusion")) |
++--------------------------------+
+| 80 |
++--------------------------------+
+```"#,
+ )
+ .with_standard_argument("str", "String")
+ .with_related_udf("length")
+ .with_related_udf("octet_length")
+ .build()
+ .unwrap()
+ })
}
diff --git a/datafusion/functions/src/string/btrim.rs
b/datafusion/functions/src/string/btrim.rs
index 0e992ff27f..b2e79a7b89 100644
--- a/datafusion/functions/src/string/btrim.rs
+++ b/datafusion/functions/src/string/btrim.rs
@@ -15,18 +15,18 @@
// specific language governing permissions and limitations
// under the License.
+use crate::string::common::*;
+use crate::utils::{make_scalar_function, utf8_to_str_type};
use arrow::array::{ArrayRef, OffsetSizeTrait};
use arrow::datatypes::DataType;
-use std::any::Any;
-
use datafusion_common::{exec_err, Result};
use datafusion_expr::function::Hint;
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::*;
-use datafusion_expr::{ColumnarValue, Volatility};
+use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
-
-use crate::string::common::*;
-use crate::utils::{make_scalar_function, utf8_to_str_type};
+use std::any::Any;
+use std::sync::OnceLock;
/// Returns the longest string with leading and trailing characters removed.
If the characters are not specified, whitespace is removed.
/// btrim('xyxtrimyyx', 'xyz') = 'trim'
@@ -109,6 +109,35 @@ impl ScalarUDFImpl for BTrimFunc {
fn aliases(&self) -> &[String] {
&self.aliases
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_btrim_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_btrim_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description("Trims the specified trim string from the start
and end of a string. If no trim string is provided, all whitespace is removed
from the start and end of the input string.")
+ .with_syntax_example("btrim(str[, trim_str])")
+ .with_sql_example(r#"```sql
+> select btrim('__datafusion____', '_');
++-------------------------------------------+
+| btrim(Utf8("__datafusion____"),Utf8("_")) |
++-------------------------------------------+
+| datafusion |
++-------------------------------------------+
+```"#)
+ .with_standard_argument("str", "String")
+ .with_argument("trim_str", "String expression to operate on. Can
be a constant, column, or function, and any combination of operators. _Default
is whitespace characters._")
+ .with_related_udf("ltrim")
+ .with_related_udf("rtrim")
+ .build()
+ .unwrap()
+ })
}
#[cfg(test)]
diff --git a/datafusion/functions/src/string/chr.rs
b/datafusion/functions/src/string/chr.rs
index 4da7dc0159..ae0900af37 100644
--- a/datafusion/functions/src/string/chr.rs
+++ b/datafusion/functions/src/string/chr.rs
@@ -16,7 +16,7 @@
// under the License.
use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
use arrow::array::ArrayRef;
use arrow::array::StringArray;
@@ -24,13 +24,13 @@ use arrow::datatypes::DataType;
use arrow::datatypes::DataType::Int64;
use arrow::datatypes::DataType::Utf8;
+use crate::utils::make_scalar_function;
use datafusion_common::cast::as_int64_array;
use datafusion_common::{exec_err, Result};
-use datafusion_expr::{ColumnarValue, Volatility};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
+use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
-use crate::utils::make_scalar_function;
-
/// Returns the character with the given code. chr(0) is disallowed because
text data types cannot store that character.
/// chr(65) = 'A'
pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
@@ -99,4 +99,35 @@ impl ScalarUDFImpl for ChrFunc {
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
make_scalar_function(chr, vec![])(args)
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_chr_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_chr_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description(
+ "Returns the character with the specified ASCII or Unicode
code value.",
+ )
+ .with_syntax_example("chr(expression)")
+ .with_sql_example(
+ r#"```sql
+> select chr(128640);
++--------------------+
+| chr(Int64(128640)) |
++--------------------+
+| 🚀 |
++--------------------+
+```"#,
+ )
+ .with_standard_argument("expression", "String")
+ .with_related_udf("ascii")
+ .build()
+ .unwrap()
+ })
}
diff --git a/datafusion/functions/src/string/concat.rs
b/datafusion/functions/src/string/concat.rs
index 98f57efef9..228fcd460c 100644
--- a/datafusion/functions/src/string/concat.rs
+++ b/datafusion/functions/src/string/concat.rs
@@ -18,18 +18,18 @@
use arrow::array::{as_largestring_array, Array};
use arrow::datatypes::DataType;
use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
+use crate::string::common::*;
+use crate::string::concat;
use datafusion_common::cast::{as_string_array, as_string_view_array};
use datafusion_common::{internal_err, plan_err, Result, ScalarValue};
use datafusion_expr::expr::ScalarFunction;
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
-use datafusion_expr::{lit, ColumnarValue, Expr, Volatility};
+use datafusion_expr::{lit, ColumnarValue, Documentation, Expr, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
-use crate::string::common::*;
-use crate::string::concat;
-
#[derive(Debug)]
pub struct ConcatFunc {
signature: Signature,
@@ -244,6 +244,36 @@ impl ScalarUDFImpl for ConcatFunc {
) -> Result<ExprSimplifyResult> {
simplify_concat(args)
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_concat_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_concat_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description("Concatenates multiple strings together.")
+ .with_syntax_example("concat(str[, ..., str_n])")
+ .with_sql_example(
+ r#"```sql
+> select concat('data', 'f', 'us', 'ion');
++-------------------------------------------------------+
+| concat(Utf8("data"),Utf8("f"),Utf8("us"),Utf8("ion")) |
++-------------------------------------------------------+
+| datafusion |
++-------------------------------------------------------+
+```"#,
+ )
+ .with_standard_argument("str", "String")
+ .with_argument("str_n", "Subsequent string expressions to
concatenate.")
+ .with_related_udf("concat_ws")
+ .build()
+ .unwrap()
+ })
}
pub fn simplify_concat(args: Vec<Expr>) -> Result<ExprSimplifyResult> {
diff --git a/datafusion/functions/src/string/concat_ws.rs
b/datafusion/functions/src/string/concat_ws.rs
index 1134c525cf..a20cbf1a16 100644
--- a/datafusion/functions/src/string/concat_ws.rs
+++ b/datafusion/functions/src/string/concat_ws.rs
@@ -17,7 +17,7 @@
use arrow::array::{as_largestring_array, Array, StringArray};
use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
use arrow::datatypes::DataType;
@@ -27,8 +27,9 @@ use crate::string::concat_ws;
use datafusion_common::cast::{as_string_array, as_string_view_array};
use datafusion_common::{exec_err, internal_err, plan_err, Result, ScalarValue};
use datafusion_expr::expr::ScalarFunction;
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
-use datafusion_expr::{lit, ColumnarValue, Expr, Volatility};
+use datafusion_expr::{lit, ColumnarValue, Documentation, Expr, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
#[derive(Debug)]
@@ -264,6 +265,45 @@ impl ScalarUDFImpl for ConcatWsFunc {
_ => Ok(ExprSimplifyResult::Original(args)),
}
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_concat_ws_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_concat_ws_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description(
+ "Concatenates multiple strings together with a specified
separator.",
+ )
+ .with_syntax_example("concat_ws(separator, str[, ..., str_n])")
+ .with_sql_example(
+ r#"```sql
+> select concat_ws('_', 'data', 'fusion');
++--------------------------------------------------+
+| concat_ws(Utf8("_"),Utf8("data"),Utf8("fusion")) |
++--------------------------------------------------+
+| data_fusion |
++--------------------------------------------------+
+```"#,
+ )
+ .with_argument(
+ "separator",
+ "Separator to insert between concatenated strings.",
+ )
+ .with_standard_argument("str", "String")
+ .with_standard_argument(
+ "str_n",
+ "Subsequent string expressions to concatenate.",
+ )
+ .with_related_udf("concat")
+ .build()
+ .unwrap()
+ })
}
fn simplify_concat_ws(delimiter: &Expr, args: &[Expr]) ->
Result<ExprSimplifyResult> {
diff --git a/datafusion/functions/src/string/contains.rs
b/datafusion/functions/src/string/contains.rs
index 722451ab53..7fc1fa876c 100644
--- a/datafusion/functions/src/string/contains.rs
+++ b/datafusion/functions/src/string/contains.rs
@@ -23,13 +23,14 @@ use arrow::datatypes::DataType::{Boolean, LargeUtf8, Utf8,
Utf8View};
use datafusion_common::exec_err;
use datafusion_common::DataFusionError;
use datafusion_common::Result;
-use datafusion_expr::ScalarUDFImpl;
use datafusion_expr::TypeSignature::Exact;
use datafusion_expr::{ColumnarValue, Signature, Volatility};
+use datafusion_expr::{Documentation, ScalarUDFImpl};
use arrow::compute::regexp_is_match;
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
#[derive(Debug)]
pub struct ContainsFunc {
@@ -84,6 +85,37 @@ impl ScalarUDFImpl for ContainsFunc {
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
make_scalar_function(contains, vec![])(args)
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_contains_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_contains_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description(
+ "Return true if search_str is found within string
(case-sensitive).",
+ )
+ .with_syntax_example("contains(str, search_str)")
+ .with_sql_example(
+ r#"```sql
+> select contains('the quick brown fox', 'row');
++---------------------------------------------------+
+| contains(Utf8("the quick brown fox"),Utf8("row")) |
++---------------------------------------------------+
+| true |
++---------------------------------------------------+
+```"#,
+ )
+ .with_standard_argument("str", "String")
+ .with_argument("search_str", "The string to search for in str.")
+ .build()
+ .unwrap()
+ })
}
/// use regexp_is_match_utf8_scalar to do the calculation for contains
diff --git a/datafusion/functions/src/string/ends_with.rs
b/datafusion/functions/src/string/ends_with.rs
index 03a1795954..786010764c 100644
--- a/datafusion/functions/src/string/ends_with.rs
+++ b/datafusion/functions/src/string/ends_with.rs
@@ -16,18 +16,18 @@
// under the License.
use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
use arrow::array::ArrayRef;
use arrow::datatypes::DataType;
+use crate::utils::make_scalar_function;
use datafusion_common::{internal_err, Result};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::*;
-use datafusion_expr::{ColumnarValue, Volatility};
+use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
-use crate::utils::make_scalar_function;
-
#[derive(Debug)]
pub struct EndsWithFunc {
signature: Signature,
@@ -84,6 +84,41 @@ impl ScalarUDFImpl for EndsWithFunc {
}
}
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_ends_with_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_ends_with_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description("Tests if a string ends with a substring.")
+ .with_syntax_example("ends_with(str, substr)")
+ .with_sql_example(
+ r#"```sql
+> select ends_with('datafusion', 'soin');
++--------------------------------------------+
+| ends_with(Utf8("datafusion"),Utf8("soin")) |
++--------------------------------------------+
+| false |
++--------------------------------------------+
+> select ends_with('datafusion', 'sion');
++--------------------------------------------+
+| ends_with(Utf8("datafusion"),Utf8("sion")) |
++--------------------------------------------+
+| true |
++--------------------------------------------+
+```"#,
+ )
+ .with_standard_argument("str", "String")
+ .with_argument("substr", "Substring to test for.")
+ .build()
+ .unwrap()
+ })
}
/// Returns true if string ends with suffix.
diff --git a/datafusion/functions/src/string/initcap.rs
b/datafusion/functions/src/string/initcap.rs
index 4e1eb213ef..ffd60bb6e9 100644
--- a/datafusion/functions/src/string/initcap.rs
+++ b/datafusion/functions/src/string/initcap.rs
@@ -16,18 +16,18 @@
// under the License.
use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait, StringArray};
use arrow::datatypes::DataType;
+use crate::utils::{make_scalar_function, utf8_to_str_type};
use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
use datafusion_common::{exec_err, Result};
-use datafusion_expr::{ColumnarValue, Volatility};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
+use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
-use crate::utils::{make_scalar_function, utf8_to_str_type};
-
#[derive(Debug)]
pub struct InitcapFunc {
signature: Signature,
@@ -79,6 +79,34 @@ impl ScalarUDFImpl for InitcapFunc {
}
}
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_initcap_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_initcap_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description("Capitalizes the first character in each word in
the input string. Words are delimited by non-alphanumeric characters.")
+ .with_syntax_example("initcap(str)")
+ .with_sql_example(r#"```sql
+> select initcap('apache datafusion');
++------------------------------------+
+| initcap(Utf8("apache datafusion")) |
++------------------------------------+
+| Apache Datafusion |
++------------------------------------+
+```"#)
+ .with_standard_argument("str", "String")
+ .with_related_udf("lower")
+ .with_related_udf("upper")
+ .build()
+ .unwrap()
+ })
}
/// Converts the first letter of each word to upper case and the rest to lower
case. Words are sequences of alphanumeric characters separated by
non-alphanumeric characters.
diff --git a/datafusion/functions/src/string/levenshtein.rs
b/datafusion/functions/src/string/levenshtein.rs
index 430c402a50..2f121426f1 100644
--- a/datafusion/functions/src/string/levenshtein.rs
+++ b/datafusion/functions/src/string/levenshtein.rs
@@ -16,7 +16,7 @@
// under the License.
use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
use arrow::array::{ArrayRef, Int32Array, Int64Array, OffsetSizeTrait};
use arrow::datatypes::DataType;
@@ -25,8 +25,9 @@ use crate::utils::{make_scalar_function, utf8_to_int_type};
use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
use datafusion_common::utils::datafusion_strsim;
use datafusion_common::{exec_err, Result};
-use datafusion_expr::ColumnarValue;
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::*;
+use datafusion_expr::{ColumnarValue, Documentation};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
#[derive(Debug)]
@@ -83,6 +84,33 @@ impl ScalarUDFImpl for LevenshteinFunc {
}
}
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_levenshtein_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_levenshtein_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description("Returns the [`Levenshtein
distance`](https://en.wikipedia.org/wiki/Levenshtein_distance) between the two
given strings.")
+ .with_syntax_example("levenshtein(str1, str2)")
+ .with_sql_example(r#"```sql
+> select levenshtein('kitten', 'sitting');
++---------------------------------------------+
+| levenshtein(Utf8("kitten"),Utf8("sitting")) |
++---------------------------------------------+
+| 3 |
++---------------------------------------------+
+```"#)
+ .with_argument("str1", "String expression to compute Levenshtein
distance with str2.")
+ .with_argument("str2", "String expression to compute Levenshtein
distance with str1.")
+ .build()
+ .unwrap()
+ })
}
///Returns the Levenshtein distance between the two given strings.
diff --git a/datafusion/functions/src/string/lower.rs
b/datafusion/functions/src/string/lower.rs
index ca324e69c0..25acfc2760 100644
--- a/datafusion/functions/src/string/lower.rs
+++ b/datafusion/functions/src/string/lower.rs
@@ -15,16 +15,16 @@
// specific language governing permissions and limitations
// under the License.
-use std::any::Any;
-
use arrow::datatypes::DataType;
-
-use datafusion_common::Result;
-use datafusion_expr::ColumnarValue;
-use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
+use std::sync::OnceLock;
use crate::string::common::to_lower;
use crate::utils::utf8_to_str_type;
+use datafusion_common::Result;
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
+use datafusion_expr::{ColumnarValue, Documentation};
+use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
#[derive(Debug)]
pub struct LowerFunc {
@@ -70,8 +70,37 @@ impl ScalarUDFImpl for LowerFunc {
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
to_lower(args, "lower")
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_lower_doc())
+ }
}
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_lower_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description("Converts a string to lower-case.")
+ .with_syntax_example("lower(str)")
+ .with_sql_example(
+ r#"```sql
+> select lower('Ångström');
++-------------------------+
+| lower(Utf8("Ångström")) |
++-------------------------+
+| ångström |
++-------------------------+
+```"#,
+ )
+ .with_standard_argument("str", "String")
+ .with_related_udf("initcap")
+ .with_related_udf("upper")
+ .build()
+ .unwrap()
+ })
+}
#[cfg(test)]
mod tests {
use super::*;
diff --git a/datafusion/functions/src/string/ltrim.rs
b/datafusion/functions/src/string/ltrim.rs
index 0ddb5a205b..1fcde9e97a 100644
--- a/datafusion/functions/src/string/ltrim.rs
+++ b/datafusion/functions/src/string/ltrim.rs
@@ -15,20 +15,20 @@
// specific language governing permissions and limitations
// under the License.
-use std::any::Any;
-
use arrow::array::{ArrayRef, OffsetSizeTrait};
use arrow::datatypes::DataType;
+use std::any::Any;
+use std::sync::OnceLock;
+use crate::string::common::*;
+use crate::utils::{make_scalar_function, utf8_to_str_type};
use datafusion_common::{exec_err, Result};
use datafusion_expr::function::Hint;
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::*;
-use datafusion_expr::{ColumnarValue, Volatility};
+use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
-use crate::string::common::*;
-use crate::utils::{make_scalar_function, utf8_to_str_type};
-
/// Returns the longest string with leading characters removed. If the
characters are not specified, whitespace is removed.
/// ltrim('zzzytest', 'xyz') = 'test'
fn ltrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
@@ -104,6 +104,41 @@ impl ScalarUDFImpl for LtrimFunc {
),
}
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_ltrim_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_ltrim_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description("Trims the specified trim string from the
beginning of a string. If no trim string is provided, all whitespace is removed
from the start of the input string.")
+ .with_syntax_example("ltrim(str[, trim_str])")
+ .with_sql_example(r#"```sql
+> select ltrim(' datafusion ');
++-------------------------------+
+| ltrim(Utf8(" datafusion ")) |
++-------------------------------+
+| datafusion |
++-------------------------------+
+> select ltrim('___datafusion___', '_');
++-------------------------------------------+
+| ltrim(Utf8("___datafusion___"),Utf8("_")) |
++-------------------------------------------+
+| datafusion___ |
++-------------------------------------------+
+```"#)
+ .with_standard_argument("str", "String")
+ .with_argument("trim_str", "String expression to trim from the
beginning of the input string. Can be a constant, column, or function, and any
combination of arithmetic operators. _Default is whitespace characters._")
+ .with_related_udf("btrim")
+ .with_related_udf("rtrim")
+ .build()
+ .unwrap()
+ })
}
#[cfg(test)]
diff --git a/datafusion/functions/src/string/octet_length.rs
b/datafusion/functions/src/string/octet_length.rs
index f792914d86..195a6c296c 100644
--- a/datafusion/functions/src/string/octet_length.rs
+++ b/datafusion/functions/src/string/octet_length.rs
@@ -15,17 +15,17 @@
// specific language governing permissions and limitations
// under the License.
-use std::any::Any;
-
use arrow::compute::kernels::length::length;
use arrow::datatypes::DataType;
+use std::any::Any;
+use std::sync::OnceLock;
+use crate::utils::utf8_to_int_type;
use datafusion_common::{exec_err, Result, ScalarValue};
-use datafusion_expr::{ColumnarValue, Volatility};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
+use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
-use crate::utils::utf8_to_int_type;
-
#[derive(Debug)]
pub struct OctetLengthFunc {
signature: Signature,
@@ -91,6 +91,36 @@ impl ScalarUDFImpl for OctetLengthFunc {
},
}
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_octet_length_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_octet_length_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description("Returns the length of a string in bytes.")
+ .with_syntax_example("octet_length(str)")
+ .with_sql_example(
+ r#"```sql
+> select octet_length('Ångström');
++--------------------------------+
+| octet_length(Utf8("Ångström")) |
++--------------------------------+
+| 10 |
++--------------------------------+
+```"#,
+ )
+ .with_standard_argument("str", "String")
+ .with_related_udf("bit_length")
+ .with_related_udf("length")
+ .build()
+ .unwrap()
+ })
}
#[cfg(test)]
diff --git a/datafusion/functions/src/string/overlay.rs
b/datafusion/functions/src/string/overlay.rs
index e285bd85b1..ec33840a0b 100644
--- a/datafusion/functions/src/string/overlay.rs
+++ b/datafusion/functions/src/string/overlay.rs
@@ -16,21 +16,21 @@
// under the License.
use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
use arrow::datatypes::DataType;
+use crate::utils::{make_scalar_function, utf8_to_str_type};
use datafusion_common::cast::{
as_generic_string_array, as_int64_array, as_string_view_array,
};
use datafusion_common::{exec_err, Result};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::*;
-use datafusion_expr::{ColumnarValue, Volatility};
+use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
-use crate::utils::{make_scalar_function, utf8_to_str_type};
-
#[derive(Debug)]
pub struct OverlayFunc {
signature: Signature,
@@ -87,6 +87,35 @@ impl ScalarUDFImpl for OverlayFunc {
other => exec_err!("Unsupported data type {other:?} for function
overlay"),
}
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_overlay_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_overlay_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description("Returns the string which is replaced by another
string from the specified position and specified count length.")
+ .with_syntax_example("overlay(str PLACING substr FROM pos [FOR
count])")
+ .with_sql_example(r#"```sql
+> select overlay('Txxxxas' placing 'hom' from 2 for 4);
++--------------------------------------------------------+
+| overlay(Utf8("Txxxxas"),Utf8("hom"),Int64(2),Int64(4)) |
++--------------------------------------------------------+
+| Thomas |
++--------------------------------------------------------+
+```"#)
+ .with_standard_argument("str", "String")
+ .with_argument("substr", "Substring to replace in str.")
+ .with_argument("pos", "The start position to start the replace in
str.")
+ .with_argument("count", "The count of characters to be replaced
from start position of str. If not specified, will use substr length instead.")
+ .build()
+ .unwrap()
+ })
}
macro_rules! process_overlay {
diff --git a/datafusion/functions/src/string/repeat.rs
b/datafusion/functions/src/string/repeat.rs
index 20e4462784..3abd1767bb 100644
--- a/datafusion/functions/src/string/repeat.rs
+++ b/datafusion/functions/src/string/repeat.rs
@@ -16,7 +16,7 @@
// under the License.
use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
use arrow::array::{
ArrayRef, AsArray, GenericStringArray, GenericStringBuilder, Int64Array,
@@ -25,15 +25,15 @@ use arrow::array::{
use arrow::datatypes::DataType;
use arrow::datatypes::DataType::{Int64, LargeUtf8, Utf8, Utf8View};
+use crate::string::common::StringArrayType;
+use crate::utils::{make_scalar_function, utf8_to_str_type};
use datafusion_common::cast::as_int64_array;
use datafusion_common::{exec_err, Result};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::*;
-use datafusion_expr::{ColumnarValue, Volatility};
+use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
-use crate::string::common::StringArrayType;
-use crate::utils::{make_scalar_function, utf8_to_str_type};
-
#[derive(Debug)]
pub struct RepeatFunc {
signature: Signature,
@@ -83,6 +83,37 @@ impl ScalarUDFImpl for RepeatFunc {
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
make_scalar_function(repeat, vec![])(args)
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_repeat_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_repeat_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description(
+ "Returns a string with an input string repeated a specified
number.",
+ )
+ .with_syntax_example("repeat(str, n)")
+ .with_sql_example(
+ r#"```sql
+> select repeat('data', 3);
++-------------------------------+
+| repeat(Utf8("data"),Int64(3)) |
++-------------------------------+
+| datadatadata |
++-------------------------------+
+```"#,
+ )
+ .with_standard_argument("str", "String")
+ .with_argument("n", "Number of times to repeat the input string.")
+ .build()
+ .unwrap()
+ })
}
/// Repeats string the specified number of times.
diff --git a/datafusion/functions/src/string/replace.rs
b/datafusion/functions/src/string/replace.rs
index 13fa3d5567..7c985b44ab 100644
--- a/datafusion/functions/src/string/replace.rs
+++ b/datafusion/functions/src/string/replace.rs
@@ -16,19 +16,19 @@
// under the License.
use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait, StringArray};
use arrow::datatypes::DataType;
+use crate::utils::{make_scalar_function, utf8_to_str_type};
use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
use datafusion_common::{exec_err, Result};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::*;
-use datafusion_expr::{ColumnarValue, Volatility};
+use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
-use crate::utils::{make_scalar_function, utf8_to_str_type};
-
#[derive(Debug)]
pub struct ReplaceFunc {
signature: Signature,
@@ -83,6 +83,34 @@ impl ScalarUDFImpl for ReplaceFunc {
}
}
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_replace_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_replace_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description("Replaces all occurrences of a specified
substring in a string with a new substring.")
+ .with_syntax_example("replace(str, substr, replacement)")
+ .with_sql_example(r#"```sql
+> select replace('ABabbaBA', 'ab', 'cd');
++-------------------------------------------------+
+| replace(Utf8("ABabbaBA"),Utf8("ab"),Utf8("cd")) |
++-------------------------------------------------+
+| ABcdbaBA |
++-------------------------------------------------+
+```"#)
+ .with_standard_argument("str", "String")
+ .with_standard_argument("substr", "Substring expression to replace
in the input string. Substring expression")
+ .with_standard_argument("replacement", "Replacement substring")
+ .build()
+ .unwrap()
+ })
}
fn replace_view(args: &[ArrayRef]) -> Result<ArrayRef> {
diff --git a/datafusion/functions/src/string/rtrim.rs
b/datafusion/functions/src/string/rtrim.rs
index a1aa5568ba..6743ad99d3 100644
--- a/datafusion/functions/src/string/rtrim.rs
+++ b/datafusion/functions/src/string/rtrim.rs
@@ -16,19 +16,19 @@
// under the License.
use arrow::array::{ArrayRef, OffsetSizeTrait};
-use std::any::Any;
-
use arrow::datatypes::DataType;
+use std::any::Any;
+use std::sync::OnceLock;
+use crate::string::common::*;
+use crate::utils::{make_scalar_function, utf8_to_str_type};
use datafusion_common::{exec_err, Result};
use datafusion_expr::function::Hint;
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::*;
-use datafusion_expr::{ColumnarValue, Volatility};
+use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
-use crate::string::common::*;
-use crate::utils::{make_scalar_function, utf8_to_str_type};
-
/// Returns the longest string with trailing characters removed. If the
characters are not specified, whitespace is removed.
/// rtrim('testxxzx', 'xyz') = 'test'
fn rtrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
@@ -104,6 +104,41 @@ impl ScalarUDFImpl for RtrimFunc {
),
}
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_rtrim_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_rtrim_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description("Trims the specified trim string from the end of
a string. If no trim string is provided, all whitespace is removed from the end
of the input string.")
+ .with_syntax_example("rtrim(str[, trim_str])")
+ .with_sql_example(r#"```sql
+> select rtrim(' datafusion ');
++-------------------------------+
+| rtrim(Utf8(" datafusion ")) |
++-------------------------------+
+| datafusion |
++-------------------------------+
+> select rtrim('___datafusion___', '_');
++-------------------------------------------+
+| rtrim(Utf8("___datafusion___"),Utf8("_")) |
++-------------------------------------------+
+| ___datafusion |
++-------------------------------------------+
+```"#)
+ .with_standard_argument("str", "String")
+ .with_argument("trim_str", "String expression to trim from the end
of the input string. Can be a constant, column, or function, and any
combination of arithmetic operators. _Default is whitespace characters._")
+ .with_related_udf("btrim")
+ .with_related_udf("ltrim")
+ .build()
+ .unwrap()
+ })
}
#[cfg(test)]
diff --git a/datafusion/functions/src/string/split_part.rs
b/datafusion/functions/src/string/split_part.rs
index 8d292315a3..2424103c84 100644
--- a/datafusion/functions/src/string/split_part.rs
+++ b/datafusion/functions/src/string/split_part.rs
@@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.
+use crate::utils::utf8_to_str_type;
use arrow::array::{
ArrayRef, GenericStringArray, Int64Array, OffsetSizeTrait, StringViewArray,
};
@@ -23,13 +24,12 @@ use arrow::datatypes::DataType;
use datafusion_common::cast::as_int64_array;
use datafusion_common::ScalarValue;
use datafusion_common::{exec_err, DataFusionError, Result};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::*;
-use datafusion_expr::{ColumnarValue, Volatility};
+use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
use std::any::Any;
-use std::sync::Arc;
-
-use crate::utils::utf8_to_str_type;
+use std::sync::{Arc, OnceLock};
use super::common::StringArrayType;
@@ -178,6 +178,34 @@ impl ScalarUDFImpl for SplitPartFunc {
result.map(ColumnarValue::Array)
}
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_split_part_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_split_part_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description("Splits a string based on a specified delimiter
and returns the substring in the specified position.")
+ .with_syntax_example("split_part(str, delimiter, pos)")
+ .with_sql_example(r#"```sql
+> select split_part('1.2.3.4.5', '.', 3);
++--------------------------------------------------+
+| split_part(Utf8("1.2.3.4.5"),Utf8("."),Int64(3)) |
++--------------------------------------------------+
+| 3 |
++--------------------------------------------------+
+```"#)
+ .with_standard_argument("str", "String")
+ .with_argument("delimiter", "String or character to split on.")
+ .with_argument("pos", "Position of the part to return.")
+ .build()
+ .unwrap()
+ })
}
/// impl
diff --git a/datafusion/functions/src/string/starts_with.rs
b/datafusion/functions/src/string/starts_with.rs
index 8450697cbf..ff4bf01c99 100644
--- a/datafusion/functions/src/string/starts_with.rs
+++ b/datafusion/functions/src/string/starts_with.rs
@@ -16,18 +16,18 @@
// under the License.
use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
use arrow::array::ArrayRef;
use arrow::datatypes::DataType;
+use crate::utils::make_scalar_function;
use datafusion_common::{internal_err, Result};
-use datafusion_expr::ColumnarValue;
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::*;
+use datafusion_expr::{ColumnarValue, Documentation};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
-use crate::utils::make_scalar_function;
-
/// Returns true if string starts with prefix.
/// starts_with('alphabet', 'alph') = 't'
pub fn starts_with(args: &[ArrayRef]) -> Result<ArrayRef> {
@@ -89,6 +89,35 @@ impl ScalarUDFImpl for StartsWithFunc {
_ => internal_err!("Unsupported data types for starts_with.
Expected Utf8, LargeUtf8 or Utf8View")?,
}
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_starts_with_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_starts_with_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description("Tests if a string starts with a substring.")
+ .with_syntax_example("starts_with(str, substr)")
+ .with_sql_example(
+ r#"```sql
+> select starts_with('datafusion','data');
++----------------------------------------------+
+| starts_with(Utf8("datafusion"),Utf8("data")) |
++----------------------------------------------+
+| true |
++----------------------------------------------+
+```"#,
+ )
+ .with_standard_argument("str", "String")
+ .with_argument("substr", "Substring to test for.")
+ .build()
+ .unwrap()
+ })
}
#[cfg(test)]
diff --git a/datafusion/functions/src/string/to_hex.rs
b/datafusion/functions/src/string/to_hex.rs
index 79aa9254f9..72cd4fbffa 100644
--- a/datafusion/functions/src/string/to_hex.rs
+++ b/datafusion/functions/src/string/to_hex.rs
@@ -16,21 +16,21 @@
// under the License.
use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
use arrow::datatypes::{
ArrowNativeType, ArrowPrimitiveType, DataType, Int32Type, Int64Type,
};
+use crate::utils::make_scalar_function;
use datafusion_common::cast::as_primitive_array;
use datafusion_common::Result;
use datafusion_common::{exec_err, plan_err};
-use datafusion_expr::ColumnarValue;
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
+use datafusion_expr::{ColumnarValue, Documentation};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
-use crate::utils::make_scalar_function;
-
/// Converts the number to its equivalent hexadecimal representation.
/// to_hex(2147483647) = '7fffffff'
pub fn to_hex<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
@@ -110,6 +110,34 @@ impl ScalarUDFImpl for ToHexFunc {
other => exec_err!("Unsupported data type {other:?} for function
to_hex"),
}
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_to_hex_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_to_hex_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description("Converts an integer to a hexadecimal string.")
+ .with_syntax_example("to_hex(int)")
+ .with_sql_example(
+ r#"```sql
+> select to_hex(12345689);
++-------------------------+
+| to_hex(Int64(12345689)) |
++-------------------------+
+| bc6159 |
++-------------------------+
+```"#,
+ )
+ .with_standard_argument("int", "Integer")
+ .build()
+ .unwrap()
+ })
}
#[cfg(test)]
diff --git a/datafusion/functions/src/string/upper.rs
b/datafusion/functions/src/string/upper.rs
index 593e33ab6b..caef7f6552 100644
--- a/datafusion/functions/src/string/upper.rs
+++ b/datafusion/functions/src/string/upper.rs
@@ -19,9 +19,11 @@ use crate::string::common::to_upper;
use crate::utils::utf8_to_str_type;
use arrow::datatypes::DataType;
use datafusion_common::Result;
-use datafusion_expr::ColumnarValue;
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
+use datafusion_expr::{ColumnarValue, Documentation};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use std::any::Any;
+use std::sync::OnceLock;
#[derive(Debug)]
pub struct UpperFunc {
@@ -67,6 +69,36 @@ impl ScalarUDFImpl for UpperFunc {
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
to_upper(args, "upper")
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_upper_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_upper_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description("Converts a string to upper-case.")
+ .with_syntax_example("upper(str)")
+ .with_sql_example(
+ r#"```sql
+> select upper('dataFusion');
++---------------------------+
+| upper(Utf8("dataFusion")) |
++---------------------------+
+| DATAFUSION |
++---------------------------+
+```"#,
+ )
+ .with_standard_argument("str", "String")
+ .with_related_udf("initcap")
+ .with_related_udf("lower")
+ .build()
+ .unwrap()
+ })
}
#[cfg(test)]
diff --git a/datafusion/functions/src/string/uuid.rs
b/datafusion/functions/src/string/uuid.rs
index 3ddc320fce..0fbdce16cc 100644
--- a/datafusion/functions/src/string/uuid.rs
+++ b/datafusion/functions/src/string/uuid.rs
@@ -16,7 +16,7 @@
// under the License.
use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
use arrow::array::GenericStringArray;
use arrow::datatypes::DataType;
@@ -24,7 +24,8 @@ use arrow::datatypes::DataType::Utf8;
use uuid::Uuid;
use datafusion_common::{not_impl_err, Result};
-use datafusion_expr::{ColumnarValue, Volatility};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
+use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
#[derive(Debug)]
@@ -74,4 +75,29 @@ impl ScalarUDFImpl for UuidFunc {
let array = GenericStringArray::<i32>::from_iter_values(values);
Ok(ColumnarValue::Array(Arc::new(array)))
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_uuid_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_uuid_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description("Returns [`UUID
v4`](https://en.wikipedia.org/wiki/Universally_unique_identifier#Version_4_(random))
string value which is unique per row.")
+ .with_syntax_example("uuid()")
+ .with_sql_example(r#"```sql
+> select uuid();
++--------------------------------------+
+| uuid() |
++--------------------------------------+
+| 6ec17ef8-1934-41cc-8d59-d0c8f9eea1f0 |
++--------------------------------------+
+```"#)
+ .build()
+ .unwrap()
+ })
}
diff --git a/datafusion/functions/src/unicode/character_length.rs
b/datafusion/functions/src/unicode/character_length.rs
index c9dc96b2a9..bfb60bfbe2 100644
--- a/datafusion/functions/src/unicode/character_length.rs
+++ b/datafusion/functions/src/unicode/character_length.rs
@@ -22,9 +22,12 @@ use arrow::array::{
};
use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
use datafusion_common::Result;
-use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
+use datafusion_expr::{
+ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+};
use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
#[derive(Debug)]
pub struct CharacterLengthFunc {
@@ -76,6 +79,36 @@ impl ScalarUDFImpl for CharacterLengthFunc {
fn aliases(&self) -> &[String] {
&self.aliases
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_character_length_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_character_length_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description("Returns the number of characters in a string.")
+ .with_syntax_example("character_length(str)")
+ .with_sql_example(
+ r#"```sql
+> select character_length('Ångström');
++------------------------------------+
+| character_length(Utf8("Ångström")) |
++------------------------------------+
+| 8 |
++------------------------------------+
+```"#,
+ )
+ .with_standard_argument("str", "String")
+ .with_related_udf("bit_length")
+ .with_related_udf("octet_length")
+ .build()
+ .unwrap()
+ })
}
/// Returns number of characters in the string.
diff --git a/datafusion/functions/src/unicode/find_in_set.rs
b/datafusion/functions/src/unicode/find_in_set.rs
index 41a2b9d9e7..cad860e410 100644
--- a/datafusion/functions/src/unicode/find_in_set.rs
+++ b/datafusion/functions/src/unicode/find_in_set.rs
@@ -16,7 +16,7 @@
// under the License.
use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
use arrow::array::{
ArrayAccessor, ArrayIter, ArrayRef, ArrowPrimitiveType, AsArray,
OffsetSizeTrait,
@@ -24,11 +24,13 @@ use arrow::array::{
};
use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
+use crate::utils::{make_scalar_function, utf8_to_int_type};
use datafusion_common::{exec_err, Result};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::Exact;
-use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
-
-use crate::utils::{make_scalar_function, utf8_to_int_type};
+use datafusion_expr::{
+ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+};
#[derive(Debug)]
pub struct FindInSetFunc {
@@ -77,6 +79,33 @@ impl ScalarUDFImpl for FindInSetFunc {
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
make_scalar_function(find_in_set, vec![])(args)
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_find_in_set_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_find_in_set_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description("Returns a value in the range of 1 to N if the
string str is in the string list strlist consisting of N substrings.")
+ .with_syntax_example("find_in_set(str, strlist)")
+ .with_sql_example(r#"```sql
+> select find_in_set('b', 'a,b,c,d');
++----------------------------------------+
+| find_in_set(Utf8("b"),Utf8("a,b,c,d")) |
++----------------------------------------+
+| 2 |
++----------------------------------------+
+```"#)
+ .with_argument("str", "String expression to find in strlist.")
+ .with_argument("strlist", "A string list is a string composed of
substrings separated by , characters.")
+ .build()
+ .unwrap()
+ })
}
///Returns a value in the range of 1 to N if the string str is in the string
list strlist consisting of N substrings
diff --git a/datafusion/functions/src/unicode/left.rs
b/datafusion/functions/src/unicode/left.rs
index c49784948d..6610cfb25e 100644
--- a/datafusion/functions/src/unicode/left.rs
+++ b/datafusion/functions/src/unicode/left.rs
@@ -17,7 +17,7 @@
use std::any::Any;
use std::cmp::Ordering;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
use arrow::array::{
Array, ArrayAccessor, ArrayIter, ArrayRef, GenericStringArray, Int64Array,
@@ -25,15 +25,17 @@ use arrow::array::{
};
use arrow::datatypes::DataType;
+use crate::utils::{make_scalar_function, utf8_to_str_type};
use datafusion_common::cast::{
as_generic_string_array, as_int64_array, as_string_view_array,
};
use datafusion_common::exec_err;
use datafusion_common::Result;
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::Exact;
-use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
-
-use crate::utils::{make_scalar_function, utf8_to_str_type};
+use datafusion_expr::{
+ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+};
#[derive(Debug)]
pub struct LeftFunc {
@@ -91,6 +93,34 @@ impl ScalarUDFImpl for LeftFunc {
),
}
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_left_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_left_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description("Returns a specified number of characters from
the left side of a string.")
+ .with_syntax_example("left(str, n)")
+ .with_sql_example(r#"```sql
+> select left('datafusion', 4);
++-----------------------------------+
+| left(Utf8("datafusion"),Int64(4)) |
++-----------------------------------+
+| data |
++-----------------------------------+
+```"#)
+ .with_standard_argument("str", "String")
+ .with_argument("n", "Number of characters to return.")
+ .with_related_udf("right")
+ .build()
+ .unwrap()
+ })
}
/// Returns first n characters in the string, or when n is negative, returns
all but last |n| characters.
diff --git a/datafusion/functions/src/unicode/lpad.rs
b/datafusion/functions/src/unicode/lpad.rs
index e102673c42..48bd583720 100644
--- a/datafusion/functions/src/unicode/lpad.rs
+++ b/datafusion/functions/src/unicode/lpad.rs
@@ -17,7 +17,7 @@
use std::any::Any;
use std::fmt::Write;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
use arrow::array::{
Array, ArrayRef, AsArray, GenericStringArray, GenericStringBuilder,
Int64Array,
@@ -27,13 +27,15 @@ use arrow::datatypes::DataType;
use unicode_segmentation::UnicodeSegmentation;
use DataType::{LargeUtf8, Utf8, Utf8View};
+use crate::string::common::StringArrayType;
+use crate::utils::{make_scalar_function, utf8_to_str_type};
use datafusion_common::cast::as_int64_array;
use datafusion_common::{exec_err, Result};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::Exact;
-use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
-
-use crate::string::common::StringArrayType;
-use crate::utils::{make_scalar_function, utf8_to_str_type};
+use datafusion_expr::{
+ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+};
#[derive(Debug)]
pub struct LPadFunc {
@@ -95,6 +97,35 @@ impl ScalarUDFImpl for LPadFunc {
other => exec_err!("Unsupported data type {other:?} for function
lpad"),
}
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_lpad_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_lpad_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description("Pads the left side of a string with another
string to a specified string length.")
+ .with_syntax_example("lpad(str, n[, padding_str])")
+ .with_sql_example(r#"```sql
+> select lpad('Dolly', 10, 'hello');
++---------------------------------------------+
+| lpad(Utf8("Dolly"),Int64(10),Utf8("hello")) |
++---------------------------------------------+
+| helloDolly |
++---------------------------------------------+
+```"#)
+ .with_standard_argument("str", "String")
+ .with_argument("n", "String length to pad to.")
+ .with_argument("padding_str", "Optional string expression to pad
with. Can be a constant, column, or function, and any combination of string
operators. _Default is a space._")
+ .with_related_udf("rpad")
+ .build()
+ .unwrap()
+ })
}
/// Extends the string to length 'length' by prepending the characters fill (a
space by default).
diff --git a/datafusion/functions/src/unicode/reverse.rs
b/datafusion/functions/src/unicode/reverse.rs
index da16d3ee37..32872c28a6 100644
--- a/datafusion/functions/src/unicode/reverse.rs
+++ b/datafusion/functions/src/unicode/reverse.rs
@@ -16,19 +16,21 @@
// under the License.
use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
+use crate::utils::{make_scalar_function, utf8_to_str_type};
use arrow::array::{
Array, ArrayAccessor, ArrayIter, ArrayRef, AsArray, GenericStringArray,
OffsetSizeTrait,
};
use arrow::datatypes::DataType;
use datafusion_common::{exec_err, Result};
-use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
+use datafusion_expr::{
+ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+};
use DataType::{LargeUtf8, Utf8, Utf8View};
-use crate::utils::{make_scalar_function, utf8_to_str_type};
-
#[derive(Debug)]
pub struct ReverseFunc {
signature: Signature,
@@ -79,6 +81,34 @@ impl ScalarUDFImpl for ReverseFunc {
}
}
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_reverse_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_reverse_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description("Reverses the character order of a string.")
+ .with_syntax_example("reverse(str)")
+ .with_sql_example(
+ r#"```sql
+> select reverse('datafusion');
++-----------------------------+
+| reverse(Utf8("datafusion")) |
++-----------------------------+
+| noisufatad |
++-----------------------------+
+```"#,
+ )
+ .with_standard_argument("str", "String")
+ .build()
+ .unwrap()
+ })
}
/// Reverses the order of the characters in the string.
diff --git a/datafusion/functions/src/unicode/right.rs
b/datafusion/functions/src/unicode/right.rs
index 9d542bb2c0..585611fe60 100644
--- a/datafusion/functions/src/unicode/right.rs
+++ b/datafusion/functions/src/unicode/right.rs
@@ -17,7 +17,7 @@
use std::any::Any;
use std::cmp::{max, Ordering};
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
use arrow::array::{
Array, ArrayAccessor, ArrayIter, ArrayRef, GenericStringArray, Int64Array,
@@ -31,8 +31,11 @@ use datafusion_common::cast::{
};
use datafusion_common::exec_err;
use datafusion_common::Result;
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::Exact;
-use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use datafusion_expr::{
+ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+};
#[derive(Debug)]
pub struct RightFunc {
@@ -90,6 +93,34 @@ impl ScalarUDFImpl for RightFunc {
),
}
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_right_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_right_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description("Returns a specified number of characters from
the right side of a string.")
+ .with_syntax_example("right(str, n)")
+ .with_sql_example(r#"```sql
+> select right('datafusion', 6);
++------------------------------------+
+| right(Utf8("datafusion"),Int64(6)) |
++------------------------------------+
+| fusion |
++------------------------------------+
+```"#)
+ .with_standard_argument("str", "String")
+ .with_argument("n", "Number of characters to return")
+ .with_related_udf("left")
+ .build()
+ .unwrap()
+ })
}
/// Returns last n characters in the string, or when n is negative, returns
all but first |n| characters.
diff --git a/datafusion/functions/src/unicode/rpad.rs
b/datafusion/functions/src/unicode/rpad.rs
index 05ecff05a1..9ca65e229c 100644
--- a/datafusion/functions/src/unicode/rpad.rs
+++ b/datafusion/functions/src/unicode/rpad.rs
@@ -47,27 +47,6 @@ impl Default for RPadFunc {
}
}
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_rpad_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder()
- .with_doc_section(DOC_SECTION_STRING)
- .with_description("Pads the right side of a string with another
string to a specified string length.")
- .with_syntax_example("rpad(str, n[, padding_str])")
- .with_standard_argument(
- "str",
- "String",
- )
- .with_argument("n", "String length to pad to.")
- .with_argument("padding_str",
- "String expression to pad with. Can be a constant,
column, or function, and any combination of string operators. _Default is a
space._")
- .with_related_udf("lpad")
- .build()
- .unwrap()
- })
-}
-
impl RPadFunc {
pub fn new() -> Self {
use DataType::*;
@@ -143,6 +122,35 @@ impl ScalarUDFImpl for RPadFunc {
}
}
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_rpad_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description("Pads the right side of a string with another
string to a specified string length.")
+ .with_syntax_example("rpad(str, n[, padding_str])")
+ .with_sql_example(r#"```sql
+> select rpad('datafusion', 20, '_-');
++-----------------------------------------------+
+| rpad(Utf8("datafusion"),Int64(20),Utf8("_-")) |
++-----------------------------------------------+
+| datafusion_-_-_-_-_- |
++-----------------------------------------------+
+```"#)
+ .with_standard_argument(
+ "str",
+ "String",
+ )
+ .with_argument("n", "String length to pad to.")
+ .with_argument("padding_str",
+ "String expression to pad with. Can be a constant,
column, or function, and any combination of string operators. _Default is a
space._")
+ .with_related_udf("lpad")
+ .build()
+ .unwrap()
+ })
+}
+
pub fn rpad<StringArrayLen: OffsetSizeTrait, FillArrayLen: OffsetSizeTrait>(
args: &[ArrayRef],
) -> Result<ArrayRef> {
diff --git a/datafusion/functions/src/unicode/strpos.rs
b/datafusion/functions/src/unicode/strpos.rs
index 6da67c8a27..eaff62c338 100644
--- a/datafusion/functions/src/unicode/strpos.rs
+++ b/datafusion/functions/src/unicode/strpos.rs
@@ -16,7 +16,7 @@
// under the License.
use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
use arrow::array::{ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray};
use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
@@ -24,8 +24,11 @@ use arrow::datatypes::{ArrowNativeType, DataType, Int32Type,
Int64Type};
use crate::string::common::StringArrayType;
use crate::utils::{make_scalar_function, utf8_to_int_type};
use datafusion_common::{exec_err, Result};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::Exact;
-use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use datafusion_expr::{
+ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+};
#[derive(Debug)]
pub struct StrposFunc {
@@ -84,6 +87,33 @@ impl ScalarUDFImpl for StrposFunc {
fn aliases(&self) -> &[String] {
&self.aliases
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_strpos_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_strpos_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description("Returns the starting position of a specified
substring in a string. Positions begin at 1. If the substring does not exist in
the string, the function returns 0.")
+ .with_syntax_example("strpos(str, substr)")
+ .with_sql_example(r#"```sql
+> select strpos('datafusion', 'fus');
++----------------------------------------+
+| strpos(Utf8("datafusion"),Utf8("fus")) |
++----------------------------------------+
+| 5 |
++----------------------------------------+
+```"#)
+ .with_standard_argument("str", "String")
+ .with_argument("substr", "Substring expression to search for.")
+ .build()
+ .unwrap()
+ })
}
fn strpos(args: &[ArrayRef]) -> Result<ArrayRef> {
diff --git a/datafusion/functions/src/unicode/substr.rs
b/datafusion/functions/src/unicode/substr.rs
index 205de0b30b..c253ef7e03 100644
--- a/datafusion/functions/src/unicode/substr.rs
+++ b/datafusion/functions/src/unicode/substr.rs
@@ -16,7 +16,7 @@
// under the License.
use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
use crate::string::common::{make_and_append_view, StringArrayType};
use crate::utils::{make_scalar_function, utf8_to_str_type};
@@ -28,7 +28,10 @@ use arrow::datatypes::DataType;
use arrow_buffer::{NullBufferBuilder, ScalarBuffer};
use datafusion_common::cast::as_int64_array;
use datafusion_common::{exec_err, plan_err, Result};
-use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
+use datafusion_expr::{
+ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+};
#[derive(Debug)]
pub struct SubstrFunc {
@@ -138,6 +141,34 @@ impl ScalarUDFImpl for SubstrFunc {
])
}
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_substr_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_substr_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description("Extracts a substring of a specified number of
characters from a specific starting position in a string.")
+ .with_syntax_example("substr(str, start_pos[, length])")
+ .with_sql_example(r#"```sql
+> select substr('datafusion', 5, 3);
++----------------------------------------------+
+| substr(Utf8("datafusion"),Int64(5),Int64(3)) |
++----------------------------------------------+
+| fus |
++----------------------------------------------+
+```"#)
+ .with_standard_argument("str", "String")
+ .with_argument("start_pos", "Character position to start the
substring at. The first character in the string has a position of 1.")
+ .with_argument("length", "Number of characters to extract. If not
specified, returns the rest of the string after the start position.")
+ .build()
+ .unwrap()
+ })
}
/// Extracts the substring of string starting at the start'th character, and
extending for count characters if that is specified. (Same as substring(string
from start for count).)
diff --git a/datafusion/functions/src/unicode/substrindex.rs
b/datafusion/functions/src/unicode/substrindex.rs
index 6591ee2640..436d554a49 100644
--- a/datafusion/functions/src/unicode/substrindex.rs
+++ b/datafusion/functions/src/unicode/substrindex.rs
@@ -16,7 +16,7 @@
// under the License.
use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
use arrow::array::{
ArrayAccessor, ArrayIter, ArrayRef, ArrowPrimitiveType, AsArray,
OffsetSizeTrait,
@@ -24,11 +24,13 @@ use arrow::array::{
};
use arrow::datatypes::{DataType, Int32Type, Int64Type};
+use crate::utils::{make_scalar_function, utf8_to_str_type};
use datafusion_common::{exec_err, Result};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::Exact;
-use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
-
-use crate::utils::{make_scalar_function, utf8_to_str_type};
+use datafusion_expr::{
+ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+};
#[derive(Debug)]
pub struct SubstrIndexFunc {
@@ -83,6 +85,42 @@ impl ScalarUDFImpl for SubstrIndexFunc {
fn aliases(&self) -> &[String] {
&self.aliases
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_substr_index_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_substr_index_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description(r#"Returns the substring from str before count
occurrences of the delimiter delim.
+If count is positive, everything to the left of the final delimiter (counting
from the left) is returned.
+If count is negative, everything to the right of the final delimiter (counting
from the right) is returned."#)
+ .with_syntax_example("substr_index(str, delim, count)")
+ .with_sql_example(r#"```sql
+> select substr_index('www.apache.org', '.', 1);
++---------------------------------------------------------+
+| substr_index(Utf8("www.apache.org"),Utf8("."),Int64(1)) |
++---------------------------------------------------------+
+| www |
++---------------------------------------------------------+
+> select substr_index('www.apache.org', '.', -1);
++----------------------------------------------------------+
+| substr_index(Utf8("www.apache.org"),Utf8("."),Int64(-1)) |
++----------------------------------------------------------+
+| org |
++----------------------------------------------------------+
+```"#)
+ .with_standard_argument("str", "String")
+ .with_argument("delim", "The string to find in str to split str.")
+ .with_argument("count", "The number of times to search for the
delimiter. Can be either a positive or negative number.")
+ .build()
+ .unwrap()
+ })
}
/// Returns the substring from str before count occurrences of the delimiter
delim. If count is positive, everything to the left of the final delimiter
(counting from the left) is returned. If count is negative, everything to the
right of the final delimiter (counting from the right) is returned.
diff --git a/datafusion/functions/src/unicode/translate.rs
b/datafusion/functions/src/unicode/translate.rs
index a42b9c6cb8..cbee9a6fe1 100644
--- a/datafusion/functions/src/unicode/translate.rs
+++ b/datafusion/functions/src/unicode/translate.rs
@@ -16,7 +16,7 @@
// under the License.
use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
use arrow::array::{
ArrayAccessor, ArrayIter, ArrayRef, AsArray, GenericStringArray,
OffsetSizeTrait,
@@ -27,8 +27,11 @@ use unicode_segmentation::UnicodeSegmentation;
use crate::utils::{make_scalar_function, utf8_to_str_type};
use datafusion_common::{exec_err, Result};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::TypeSignature::Exact;
-use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use datafusion_expr::{
+ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+};
#[derive(Debug)]
pub struct TranslateFunc {
@@ -76,6 +79,34 @@ impl ScalarUDFImpl for TranslateFunc {
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
make_scalar_function(invoke_translate, vec![])(args)
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_translate_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_translate_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_STRING)
+ .with_description("Translates characters in a string to specified
translation characters.")
+ .with_syntax_example("translate(str, chars, translation)")
+ .with_sql_example(r#"```sql
+> select translate('twice', 'wic', 'her');
++--------------------------------------------------+
+| translate(Utf8("twice"),Utf8("wic"),Utf8("her")) |
++--------------------------------------------------+
+| there |
++--------------------------------------------------+
+```"#)
+ .with_standard_argument("str", "String")
+ .with_argument("chars", "Characters to translate.")
+ .with_argument("translation", "Translation characters. Translation
characters replace only characters at the same position in the **chars**
string.")
+ .build()
+ .unwrap()
+ })
}
fn invoke_translate(args: &[ArrayRef]) -> Result<ArrayRef> {
diff --git a/docs/source/user-guide/sql/aggregate_functions_new.md
b/docs/source/user-guide/sql/aggregate_functions_new.md
index 213894d7da..236ef57da5 100644
--- a/docs/source/user-guide/sql/aggregate_functions_new.md
+++ b/docs/source/user-guide/sql/aggregate_functions_new.md
@@ -90,8 +90,9 @@ var(expression)
- **expression**: Numeric expression to operate on. Can be a constant, column,
or function, and any combination of operators.
-#### Aliases- var_sample
+#### Aliases
+- var_sample
- var_samp
### `var_pop`
@@ -106,50 +107,18 @@ var_pop(expression)
- **expression**: Numeric expression to operate on. Can be a constant, column,
or function, and any combination of operators.
-#### Aliases- var_population
+#### Aliases
-### `var_pop`
-
-Returns the statistical population variance of a set of numbers.
-
-```
-var_pop(expression)
-```
-
-#### Arguments
-
-- **expression**: Numeric expression to operate on. Can be a constant, column,
or function, and any combination of operators.
-
-#### Aliases- var_population
-
-### `var`
-
-Returns the statistical sample variance of a set of numbers.
-
-```
-var(expression)
-```
-
-#### Arguments
-
-- **expression**: Numeric expression to operate on. Can be a constant, column,
or function, and any combination of operators.
-
-#### Aliases- var_sample
-
-- var_samp
+- var_population
-### `var`
+### `var_population`
-Returns the statistical sample variance of a set of numbers.
+_Alias of [var_pop](#var_pop)._
-```
-var(expression)
-```
+### `var_samp`
-#### Arguments
-
-- **expression**: Numeric expression to operate on. Can be a constant, column,
or function, and any combination of operators.
+_Alias of [var](#var)._
-#### Aliases- var_sample
+### `var_sample`
-- var_samp
+_Alias of [var](#var)._
diff --git a/docs/source/user-guide/sql/scalar_functions.md
b/docs/source/user-guide/sql/scalar_functions.md
index 469fb705b7..3e481db90c 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -644,618 +644,7 @@ _Alias of [nvl](#nvl)._
## String Functions
-- [ascii](#ascii)
-- [bit_length](#bit_length)
-- [btrim](#btrim)
-- [char_length](#char_length)
-- [character_length](#character_length)
-- [concat](#concat)
-- [concat_ws](#concat_ws)
-- [chr](#chr)
-- [ends_with](#ends_with)
-- [initcap](#initcap)
-- [instr](#instr)
-- [left](#left)
-- [length](#length)
-- [lower](#lower)
-- [lpad](#lpad)
-- [ltrim](#ltrim)
-- [octet_length](#octet_length)
-- [repeat](#repeat)
-- [replace](#replace)
-- [reverse](#reverse)
-- [right](#right)
-- [rpad](#rpad)
-- [rtrim](#rtrim)
-- [split_part](#split_part)
-- [starts_with](#starts_with)
-- [strpos](#strpos)
-- [substr](#substr)
-- [to_hex](#to_hex)
-- [translate](#translate)
-- [trim](#trim)
-- [upper](#upper)
-- [uuid](#uuid)
-- [overlay](#overlay)
-- [levenshtein](#levenshtein)
-- [substr_index](#substr_index)
-- [find_in_set](#find_in_set)
-- [position](#position)
-- [contains](#contains)
-
-### `ascii`
-
-Returns the ASCII value of the first character in a string.
-
-```
-ascii(str)
-```
-
-#### Arguments
-
-- **str**: String expression to operate on.
- Can be a constant, column, or function, and any combination of string
operators.
-
-**Related functions**:
-[chr](#chr)
-
-### `bit_length`
-
-Returns the bit length of a string.
-
-```
-bit_length(str)
-```
-
-#### Arguments
-
-- **str**: String expression to operate on.
- Can be a constant, column, or function, and any combination of string
operators.
-
-**Related functions**:
-[length](#length),
-[octet_length](#octet_length)
-
-### `btrim`
-
-Trims the specified trim string from the start and end of a string.
-If no trim string is provided, all whitespace is removed from the start and end
-of the input string.
-
-```
-btrim(str[, trim_str])
-```
-
-#### Arguments
-
-- **str**: String expression to operate on.
- Can be a constant, column, or function, and any combination of string
operators.
-- **trim_str**: String expression to trim from the beginning and end of the
input string.
- Can be a constant, column, or function, and any combination of arithmetic
operators.
- _Default is whitespace characters._
-
-**Related functions**:
-[ltrim](#ltrim),
-[rtrim](#rtrim)
-
-#### Aliases
-
-- trim
-
-### `char_length`
-
-_Alias of [length](#length)._
-
-### `character_length`
-
-_Alias of [length](#length)._
-
-### `concat`
-
-Concatenates multiple strings together.
-
-```
-concat(str[, ..., str_n])
-```
-
-#### Arguments
-
-- **str**: String expression to concatenate.
- Can be a constant, column, or function, and any combination of string
operators.
-- **str_n**: Subsequent string column or literal string to concatenate.
-
-**Related functions**:
-[concat_ws](#concat_ws)
-
-### `concat_ws`
-
-Concatenates multiple strings together with a specified separator.
-
-```
-concat_ws(separator, str[, ..., str_n])
-```
-
-#### Arguments
-
-- **separator**: Separator to insert between concatenated strings.
-- **str**: String expression to concatenate.
- Can be a constant, column, or function, and any combination of string
operators.
-- **str_n**: Subsequent string column or literal string to concatenate.
-
-**Related functions**:
-[concat](#concat)
-
-### `chr`
-
-Returns the character with the specified ASCII or Unicode code value.
-
-```
-chr(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression containing the ASCII or Unicode code value to
operate on.
- Can be a constant, column, or function, and any combination of arithmetic or
- string operators.
-
-**Related functions**:
-[ascii](#ascii)
-
-### `ends_with`
-
-Tests if a string ends with a substring.
-
-```
-ends_with(str, substr)
-```
-
-#### Arguments
-
-- **str**: String expression to test.
- Can be a constant, column, or function, and any combination of string
operators.
-- **substr**: Substring to test for.
-
-### `initcap`
-
-Capitalizes the first character in each word in the input string.
-Words are delimited by non-alphanumeric characters.
-
-```
-initcap(str)
-```
-
-#### Arguments
-
-- **str**: String expression to operate on.
- Can be a constant, column, or function, and any combination of string
operators.
-
-**Related functions**:
-[lower](#lower),
-[upper](#upper)
-
-### `instr`
-
-_Alias of [strpos](#strpos)._
-
-#### Arguments
-
-- **str**: String expression to operate on.
- Can be a constant, column, or function, and any combination of string
operators.
-- **substr**: Substring expression to search for.
- Can be a constant, column, or function, and any combination of string
operators.
-
-### `left`
-
-Returns a specified number of characters from the left side of a string.
-
-```
-left(str, n)
-```
-
-#### Arguments
-
-- **str**: String expression to operate on.
- Can be a constant, column, or function, and any combination of string
operators.
-- **n**: Number of characters to return.
-
-**Related functions**:
-[right](#right)
-
-### `length`
-
-Returns the number of characters in a string.
-
-```
-length(str)
-```
-
-#### Arguments
-
-- **str**: String expression to operate on.
- Can be a constant, column, or function, and any combination of string
operators.
-
-#### Aliases
-
-- char_length
-- character_length
-
-**Related functions**:
-[bit_length](#bit_length),
-[octet_length](#octet_length)
-
-### `lower`
-
-Converts a string to lower-case.
-
-```
-lower(str)
-```
-
-#### Arguments
-
-- **str**: String expression to operate on.
- Can be a constant, column, or function, and any combination of string
operators.
-
-**Related functions**:
-[initcap](#initcap),
-[upper](#upper)
-
-### `lpad`
-
-Pads the left side of a string with another string to a specified string
length.
-
-```
-lpad(str, n[, padding_str])
-```
-
-#### Arguments
-
-- **str**: String expression to operate on.
- Can be a constant, column, or function, and any combination of string
operators.
-- **n**: String length to pad to.
-- **padding_str**: String expression to pad with.
- Can be a constant, column, or function, and any combination of string
operators.
- _Default is a space._
-
-**Related functions**:
-[rpad](#rpad)
-
-### `ltrim`
-
-Trims the specified trim string from the beginning of a string.
-If no trim string is provided, all whitespace is removed from the start
-of the input string.
-
-```
-ltrim(str[, trim_str])
-```
-
-#### Arguments
-
-- **str**: String expression to operate on.
- Can be a constant, column, or function, and any combination of string
operators.
-- **trim_str**: String expression to trim from the beginning of the input
string.
- Can be a constant, column, or function, and any combination of arithmetic
operators.
- _Default is whitespace characters._
-
-**Related functions**:
-[btrim](#btrim),
-[rtrim](#rtrim)
-
-### `octet_length`
-
-Returns the length of a string in bytes.
-
-```
-octet_length(str)
-```
-
-#### Arguments
-
-- **str**: String expression to operate on.
- Can be a constant, column, or function, and any combination of string
operators.
-
-**Related functions**:
-[bit_length](#bit_length),
-[length](#length)
-
-### `repeat`
-
-Returns a string with an input string repeated a specified number.
-
-```
-repeat(str, n)
-```
-
-#### Arguments
-
-- **str**: String expression to repeat.
- Can be a constant, column, or function, and any combination of string
operators.
-- **n**: Number of times to repeat the input string.
-
-### `replace`
-
-Replaces all occurrences of a specified substring in a string with a new
substring.
-
-```
-replace(str, substr, replacement)
-```
-
-#### Arguments
-
-- **str**: String expression to repeat.
- Can be a constant, column, or function, and any combination of string
operators.
-- **substr**: Substring expression to replace in the input string.
- Can be a constant, column, or function, and any combination of string
operators.
-- **replacement**: Replacement substring expression.
- Can be a constant, column, or function, and any combination of string
operators.
-
-### `reverse`
-
-Reverses the character order of a string.
-
-```
-reverse(str)
-```
-
-#### Arguments
-
-- **str**: String expression to repeat.
- Can be a constant, column, or function, and any combination of string
operators.
-
-### `right`
-
-Returns a specified number of characters from the right side of a string.
-
-```
-right(str, n)
-```
-
-#### Arguments
-
-- **str**: String expression to operate on.
- Can be a constant, column, or function, and any combination of string
operators.
-- **n**: Number of characters to return.
-
-**Related functions**:
-[left](#left)
-
-### `rpad`
-
-Pads the right side of a string with another string to a specified string
length.
-
-```
-rpad(str, n[, padding_str])
-```
-
-#### Arguments
-
-- **str**: String expression to operate on.
- Can be a constant, column, or function, and any combination of string
operators.
-- **n**: String length to pad to.
-- **padding_str**: String expression to pad with.
- Can be a constant, column, or function, and any combination of string
operators.
- _Default is a space._
-
-**Related functions**:
-[lpad](#lpad)
-
-### `rtrim`
-
-Trims the specified trim string from the end of a string.
-If no trim string is provided, all whitespace is removed from the end
-of the input string.
-
-```
-rtrim(str[, trim_str])
-```
-
-#### Arguments
-
-- **str**: String expression to operate on.
- Can be a constant, column, or function, and any combination of string
operators.
-- **trim_str**: String expression to trim from the end of the input string.
- Can be a constant, column, or function, and any combination of arithmetic
operators.
- _Default is whitespace characters._
-
-**Related functions**:
-[btrim](#btrim),
-[ltrim](#ltrim)
-
-### `split_part`
-
-Splits a string based on a specified delimiter and returns the substring in the
-specified position.
-
-```
-split_part(str, delimiter, pos)
-```
-
-#### Arguments
-
-- **str**: String expression to spit.
- Can be a constant, column, or function, and any combination of string
operators.
-- **delimiter**: String or character to split on.
-- **pos**: Position of the part to return.
-
-### `starts_with`
-
-Tests if a string starts with a substring.
-
-```
-starts_with(str, substr)
-```
-
-#### Arguments
-
-- **str**: String expression to test.
- Can be a constant, column, or function, and any combination of string
operators.
-- **substr**: Substring to test for.
-
-### `strpos`
-
-Returns the starting position of a specified substring in a string.
-Positions begin at 1.
-If the substring does not exist in the string, the function returns 0.
-
-```
-strpos(str, substr)
-```
-
-#### Arguments
-
-- **str**: String expression to operate on.
- Can be a constant, column, or function, and any combination of string
operators.
-- **substr**: Substring expression to search for.
- Can be a constant, column, or function, and any combination of string
operators.
-
-#### Aliases
-
-- instr
-
-### `substr`
-
-Extracts a substring of a specified number of characters from a specific
-starting position in a string.
-
-```
-substr(str, start_pos[, length])
-```
-
-#### Arguments
-
-- **str**: String expression to operate on.
- Can be a constant, column, or function, and any combination of string
operators.
-- **start_pos**: Character position to start the substring at.
- The first character in the string has a position of 1.
-- **length**: Number of characters to extract.
- If not specified, returns the rest of the string after the start position.
-
-#### Aliases
-
-- substring
-
-### `substring`
-
-_Alias of [substr](#substr)._
-
-### `translate`
-
-Translates characters in a string to specified translation characters.
-
-```
-translate(str, chars, translation)
-```
-
-- **str**: String expression to operate on.
- Can be a constant, column, or function, and any combination of string
operators.
-- **chars**: Characters to translate.
-- **translation**: Translation characters. Translation characters replace only
- characters at the same position in the **chars** string.
-
-### `to_hex`
-
-Converts an integer to a hexadecimal string.
-
-```
-to_hex(int)
-```
-
-#### Arguments
-
-- **int**: Integer expression to convert.
- Can be a constant, column, or function, and any combination of arithmetic
operators.
-
-### `trim`
-
-_Alias of [btrim](#btrim)._
-
-### `upper`
-
-Converts a string to upper-case.
-
-```
-upper(str)
-```
-
-#### Arguments
-
-- **str**: String expression to operate on.
- Can be a constant, column, or function, and any combination of string
operators.
-
-**Related functions**:
-[initcap](#initcap),
-[lower](#lower)
-
-### `uuid`
-
-Returns UUID v4 string value which is unique per row.
-
-```
-uuid()
-```
-
-### `overlay`
-
-Returns the string which is replaced by another string from the specified
position and specified count length.
-For example, `overlay('Txxxxas' placing 'hom' from 2 for 4) → Thomas`
-
-```
-overlay(str PLACING substr FROM pos [FOR count])
-```
-
-#### Arguments
-
-- **str**: String expression to operate on.
-- **substr**: the string to replace part of str.
-- **pos**: the start position to replace of str.
-- **count**: the count of characters to be replaced from start position of
str. If not specified, will use substr length instead.
-
-### `levenshtein`
-
-Returns the Levenshtein distance between the two given strings.
-For example, `levenshtein('kitten', 'sitting') = 3`
-
-```
-levenshtein(str1, str2)
-```
-
-#### Arguments
-
-- **str1**: String expression to compute Levenshtein distance with str2.
-- **str2**: String expression to compute Levenshtein distance with str1.
-
-### `substr_index`
-
-Returns the substring from str before count occurrences of the delimiter delim.
-If count is positive, everything to the left of the final delimiter (counting
from the left) is returned.
-If count is negative, everything to the right of the final delimiter (counting
from the right) is returned.
-For example, `substr_index('www.apache.org', '.', 1) = www`,
`substr_index('www.apache.org', '.', -1) = org`
-
-```
-substr_index(str, delim, count)
-```
-
-#### Arguments
-
-- **str**: String expression to operate on.
-- **delim**: the string to find in str to split str.
-- **count**: The number of times to search for the delimiter. Can be both a
positive or negative number.
-
-### `find_in_set`
-
-Returns a value in the range of 1 to N if the string str is in the string list
strlist consisting of N substrings.
-For example, `find_in_set('b', 'a,b,c,d') = 2`
-
-```
-find_in_set(str, strlist)
-```
-
-#### Arguments
-
-- **str**: String expression to find in strlist.
-- **strlist**: A string list is a string composed of substrings separated by ,
characters.
+See the new documentation
[`here`](https://datafusion.apache.org/user-guide/sql/scalar_functions_new.html)
## Binary String Functions
@@ -1452,19 +841,6 @@ position(substr in origstr)
- **substr**: The pattern string.
- **origstr**: The model string.
-### `contains`
-
-Return true if search_string is found within string (case-sensitive).
-
-```
-contains(string, search_string)
-```
-
-#### Arguments
-
-- **string**: The pattern string.
-- **search_string**: The model string.
-
## Time and Date Functions
- [now](#now)
diff --git a/docs/source/user-guide/sql/scalar_functions_new.md
b/docs/source/user-guide/sql/scalar_functions_new.md
index bff2c0f485..2423f9c475 100644
--- a/docs/source/user-guide/sql/scalar_functions_new.md
+++ b/docs/source/user-guide/sql/scalar_functions_new.md
@@ -66,11 +66,48 @@ coalesce(expression1[, ..., expression_n])
## String Functions
- [ascii](#ascii)
+- [bit_length](#bit_length)
+- [btrim](#btrim)
+- [char_length](#char_length)
+- [character_length](#character_length)
+- [chr](#chr)
+- [concat](#concat)
+- [concat_ws](#concat_ws)
+- [contains](#contains)
+- [ends_with](#ends_with)
+- [find_in_set](#find_in_set)
+- [initcap](#initcap)
+- [instr](#instr)
+- [left](#left)
+- [length](#length)
+- [levenshtein](#levenshtein)
+- [lower](#lower)
+- [lpad](#lpad)
+- [ltrim](#ltrim)
+- [octet_length](#octet_length)
+- [position](#position)
+- [repeat](#repeat)
+- [replace](#replace)
+- [reverse](#reverse)
+- [right](#right)
- [rpad](#rpad)
+- [rtrim](#rtrim)
+- [split_part](#split_part)
+- [starts_with](#starts_with)
+- [strpos](#strpos)
+- [substr](#substr)
+- [substr_index](#substr_index)
+- [substring](#substring)
+- [substring_index](#substring_index)
+- [to_hex](#to_hex)
+- [translate](#translate)
+- [trim](#trim)
+- [upper](#upper)
+- [uuid](#uuid)
### `ascii`
-Returns the ASCII value of the first character in a string.
+Returns the Unicode character code of the first character in a string.
```
ascii(str)
@@ -78,12 +115,601 @@ ascii(str)
#### Arguments
-- **str**: String expression to operate on. Can be a constant, column, or
function that evaluates to or can be coerced to a Utf8, LargeUtf8 or a Utf8View.
+- **str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+
+#### Example
+
+```sql
+> select ascii('abc');
++--------------------+
+| ascii(Utf8("abc")) |
++--------------------+
+| 97 |
++--------------------+
+> select ascii('🚀');
++-------------------+
+| ascii(Utf8("🚀")) |
++-------------------+
+| 128640 |
++-------------------+
+```
**Related functions**:
- [chr](#chr)
+### `bit_length`
+
+Returns the bit length of a string.
+
+```
+bit_length(str)
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+
+#### Example
+
+```sql
+> select bit_length('datafusion');
++--------------------------------+
+| bit_length(Utf8("datafusion")) |
++--------------------------------+
+| 80 |
++--------------------------------+
+```
+
+**Related functions**:
+
+- [length](#length)
+- [octet_length](#octet_length)
+
+### `btrim`
+
+Trims the specified trim string from the start and end of a string. If no trim
string is provided, all whitespace is removed from the start and end of the
input string.
+
+```
+btrim(str[, trim_str])
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+- **trim_str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators. _Default is whitespace characters._
+
+#### Example
+
+```sql
+> select btrim('__datafusion____', '_');
++-------------------------------------------+
+| btrim(Utf8("__datafusion____"),Utf8("_")) |
++-------------------------------------------+
+| datafusion |
++-------------------------------------------+
+```
+
+#### Aliases
+
+- trim
+
+**Related functions**:
+
+- [ltrim](#ltrim)
+- [rtrim](#rtrim)
+
+### `char_length`
+
+_Alias of [character_length](#character_length)._
+
+### `character_length`
+
+Returns the number of characters in a string.
+
+```
+character_length(str)
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+
+#### Example
+
+```sql
+> select character_length('Ångström');
++------------------------------------+
+| character_length(Utf8("Ångström")) |
++------------------------------------+
+| 8 |
++------------------------------------+
+```
+
+#### Aliases
+
+- length
+- char_length
+
+**Related functions**:
+
+- [bit_length](#bit_length)
+- [octet_length](#octet_length)
+
+### `chr`
+
+Returns the character with the specified ASCII or Unicode code value.
+
+```
+chr(expression)
+```
+
+#### Arguments
+
+- **expression**: String expression to operate on. Can be a constant, column,
or function, and any combination of operators.
+
+#### Example
+
+```sql
+> select chr(128640);
++--------------------+
+| chr(Int64(128640)) |
++--------------------+
+| 🚀 |
++--------------------+
+```
+
+**Related functions**:
+
+- [ascii](#ascii)
+
+### `concat`
+
+Concatenates multiple strings together.
+
+```
+concat(str[, ..., str_n])
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+- **str_n**: Subsequent string expressions to concatenate.
+
+#### Example
+
+```sql
+> select concat('data', 'f', 'us', 'ion');
++-------------------------------------------------------+
+| concat(Utf8("data"),Utf8("f"),Utf8("us"),Utf8("ion")) |
++-------------------------------------------------------+
+| datafusion |
++-------------------------------------------------------+
+```
+
+**Related functions**:
+
+- [concat_ws](#concat_ws)
+
+### `concat_ws`
+
+Concatenates multiple strings together with a specified separator.
+
+```
+concat_ws(separator, str[, ..., str_n])
+```
+
+#### Arguments
+
+- **separator**: Separator to insert between concatenated strings.
+- **str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+- **str_n**: Subsequent string expressions to concatenate. expression to
operate on. Can be a constant, column, or function, and any combination of
operators.
+
+#### Example
+
+```sql
+> select concat_ws('_', 'data', 'fusion');
++--------------------------------------------------+
+| concat_ws(Utf8("_"),Utf8("data"),Utf8("fusion")) |
++--------------------------------------------------+
+| data_fusion |
++--------------------------------------------------+
+```
+
+**Related functions**:
+
+- [concat](#concat)
+
+### `contains`
+
+Return true if search_str is found within string (case-sensitive).
+
+```
+contains(str, search_str)
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+- **search_str**: The string to search for in str.
+
+#### Example
+
+```sql
+> select contains('the quick brown fox', 'row');
++---------------------------------------------------+
+| contains(Utf8("the quick brown fox"),Utf8("row")) |
++---------------------------------------------------+
+| true |
++---------------------------------------------------+
+```
+
+### `ends_with`
+
+Tests if a string ends with a substring.
+
+```
+ends_with(str, substr)
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+- **substr**: Substring to test for.
+
+#### Example
+
+```sql
+> select ends_with('datafusion', 'soin');
++--------------------------------------------+
+| ends_with(Utf8("datafusion"),Utf8("soin")) |
++--------------------------------------------+
+| false |
++--------------------------------------------+
+> select ends_with('datafusion', 'sion');
++--------------------------------------------+
+| ends_with(Utf8("datafusion"),Utf8("sion")) |
++--------------------------------------------+
+| true |
++--------------------------------------------+
+```
+
+### `find_in_set`
+
+Returns a value in the range of 1 to N if the string str is in the string list
strlist consisting of N substrings.
+
+```
+find_in_set(str, strlist)
+```
+
+#### Arguments
+
+- **str**: String expression to find in strlist.
+- **strlist**: A string list is a string composed of substrings separated by ,
characters.
+
+#### Example
+
+```sql
+> select find_in_set('b', 'a,b,c,d');
++----------------------------------------+
+| find_in_set(Utf8("b"),Utf8("a,b,c,d")) |
++----------------------------------------+
+| 2 |
++----------------------------------------+
+```
+
+### `initcap`
+
+Capitalizes the first character in each word in the input string. Words are
delimited by non-alphanumeric characters.
+
+```
+initcap(str)
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+
+#### Example
+
+```sql
+> select initcap('apache datafusion');
++------------------------------------+
+| initcap(Utf8("apache datafusion")) |
++------------------------------------+
+| Apache Datafusion |
++------------------------------------+
+```
+
+**Related functions**:
+
+- [lower](#lower)
+- [upper](#upper)
+
+### `instr`
+
+_Alias of [strpos](#strpos)._
+
+### `left`
+
+Returns a specified number of characters from the left side of a string.
+
+```
+left(str, n)
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+- **n**: Number of characters to return.
+
+#### Example
+
+```sql
+> select left('datafusion', 4);
++-----------------------------------+
+| left(Utf8("datafusion"),Int64(4)) |
++-----------------------------------+
+| data |
++-----------------------------------+
+```
+
+**Related functions**:
+
+- [right](#right)
+
+### `length`
+
+_Alias of [character_length](#character_length)._
+
+### `levenshtein`
+
+Returns the [`Levenshtein
distance`](https://en.wikipedia.org/wiki/Levenshtein_distance) between the two
given strings.
+
+```
+levenshtein(str1, str2)
+```
+
+#### Arguments
+
+- **str1**: String expression to compute Levenshtein distance with str2.
+- **str2**: String expression to compute Levenshtein distance with str1.
+
+#### Example
+
+```sql
+> select levenshtein('kitten', 'sitting');
++---------------------------------------------+
+| levenshtein(Utf8("kitten"),Utf8("sitting")) |
++---------------------------------------------+
+| 3 |
++---------------------------------------------+
+```
+
+### `lower`
+
+Converts a string to lower-case.
+
+```
+lower(str)
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+
+#### Example
+
+```sql
+> select lower('Ångström');
++-------------------------+
+| lower(Utf8("Ångström")) |
++-------------------------+
+| ångström |
++-------------------------+
+```
+
+**Related functions**:
+
+- [initcap](#initcap)
+- [upper](#upper)
+
+### `lpad`
+
+Pads the left side of a string with another string to a specified string
length.
+
+```
+lpad(str, n[, padding_str])
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+- **n**: String length to pad to.
+- **padding_str**: Optional string expression to pad with. Can be a constant,
column, or function, and any combination of string operators. _Default is a
space._
+
+#### Example
+
+```sql
+> select lpad('Dolly', 10, 'hello');
++---------------------------------------------+
+| lpad(Utf8("Dolly"),Int64(10),Utf8("hello")) |
++---------------------------------------------+
+| helloDolly |
++---------------------------------------------+
+```
+
+**Related functions**:
+
+- [rpad](#rpad)
+
+### `ltrim`
+
+Trims the specified trim string from the beginning of a string. If no trim
string is provided, all whitespace is removed from the start of the input
string.
+
+```
+ltrim(str[, trim_str])
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+- **trim_str**: String expression to trim from the beginning of the input
string. Can be a constant, column, or function, and any combination of
arithmetic operators. _Default is whitespace characters._
+
+#### Example
+
+```sql
+> select ltrim(' datafusion ');
++-------------------------------+
+| ltrim(Utf8(" datafusion ")) |
++-------------------------------+
+| datafusion |
++-------------------------------+
+> select ltrim('___datafusion___', '_');
++-------------------------------------------+
+| ltrim(Utf8("___datafusion___"),Utf8("_")) |
++-------------------------------------------+
+| datafusion___ |
++-------------------------------------------+
+```
+
+**Related functions**:
+
+- [btrim](#btrim)
+- [rtrim](#rtrim)
+
+### `octet_length`
+
+Returns the length of a string in bytes.
+
+```
+octet_length(str)
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+
+#### Example
+
+```sql
+> select octet_length('Ångström');
++--------------------------------+
+| octet_length(Utf8("Ångström")) |
++--------------------------------+
+| 10 |
++--------------------------------+
+```
+
+**Related functions**:
+
+- [bit_length](#bit_length)
+- [length](#length)
+
+### `position`
+
+_Alias of [strpos](#strpos)._
+
+### `repeat`
+
+Returns a string with an input string repeated a specified number.
+
+```
+repeat(str, n)
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+- **n**: Number of times to repeat the input string.
+
+#### Example
+
+```sql
+> select repeat('data', 3);
++-------------------------------+
+| repeat(Utf8("data"),Int64(3)) |
++-------------------------------+
+| datadatadata |
++-------------------------------+
+```
+
+### `replace`
+
+Replaces all occurrences of a specified substring in a string with a new
substring.
+
+```
+replace(str, substr, replacement)
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+- **substr**: Substring expression to replace in the input string. Substring
expression expression to operate on. Can be a constant, column, or function,
and any combination of operators.
+- **replacement**: Replacement substring expression to operate on. Can be a
constant, column, or function, and any combination of operators.
+
+#### Example
+
+```sql
+> select replace('ABabbaBA', 'ab', 'cd');
++-------------------------------------------------+
+| replace(Utf8("ABabbaBA"),Utf8("ab"),Utf8("cd")) |
++-------------------------------------------------+
+| ABcdbaBA |
++-------------------------------------------------+
+```
+
+### `reverse`
+
+Reverses the character order of a string.
+
+```
+reverse(str)
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+
+#### Example
+
+```sql
+> select reverse('datafusion');
++-----------------------------+
+| reverse(Utf8("datafusion")) |
++-----------------------------+
+| noisufatad |
++-----------------------------+
+```
+
+### `right`
+
+Returns a specified number of characters from the right side of a string.
+
+```
+right(str, n)
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+- **n**: Number of characters to return
+
+#### Example
+
+```sql
+> select right('datafusion', 6);
++------------------------------------+
+| right(Utf8("datafusion"),Int64(6)) |
++------------------------------------+
+| fusion |
++------------------------------------+
+```
+
+**Related functions**:
+
+- [left](#left)
+
### `rpad`
Pads the right side of a string with another string to a specified string
length.
@@ -98,10 +724,307 @@ rpad(str, n[, padding_str])
- **n**: String length to pad to.
- **padding_str**: String expression to pad with. Can be a constant, column,
or function, and any combination of string operators. _Default is a space._
+#### Example
+
+```sql
+> select rpad('datafusion', 20, '_-');
++-----------------------------------------------+
+| rpad(Utf8("datafusion"),Int64(20),Utf8("_-")) |
++-----------------------------------------------+
+| datafusion_-_-_-_-_- |
++-----------------------------------------------+
+```
+
**Related functions**:
- [lpad](#lpad)
+### `rtrim`
+
+Trims the specified trim string from the end of a string. If no trim string is
provided, all whitespace is removed from the end of the input string.
+
+```
+rtrim(str[, trim_str])
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+- **trim_str**: String expression to trim from the end of the input string.
Can be a constant, column, or function, and any combination of arithmetic
operators. _Default is whitespace characters._
+
+#### Example
+
+```sql
+> select rtrim(' datafusion ');
++-------------------------------+
+| rtrim(Utf8(" datafusion ")) |
++-------------------------------+
+| datafusion |
++-------------------------------+
+> select rtrim('___datafusion___', '_');
++-------------------------------------------+
+| rtrim(Utf8("___datafusion___"),Utf8("_")) |
++-------------------------------------------+
+| ___datafusion |
++-------------------------------------------+
+```
+
+**Related functions**:
+
+- [btrim](#btrim)
+- [ltrim](#ltrim)
+
+### `split_part`
+
+Splits a string based on a specified delimiter and returns the substring in
the specified position.
+
+```
+split_part(str, delimiter, pos)
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+- **delimiter**: String or character to split on.
+- **pos**: Position of the part to return.
+
+#### Example
+
+```sql
+> select split_part('1.2.3.4.5', '.', 3);
++--------------------------------------------------+
+| split_part(Utf8("1.2.3.4.5"),Utf8("."),Int64(3)) |
++--------------------------------------------------+
+| 3 |
++--------------------------------------------------+
+```
+
+### `starts_with`
+
+Tests if a string starts with a substring.
+
+```
+starts_with(str, substr)
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+- **substr**: Substring to test for.
+
+#### Example
+
+```sql
+> select starts_with('datafusion','data');
++----------------------------------------------+
+| starts_with(Utf8("datafusion"),Utf8("data")) |
++----------------------------------------------+
+| true |
++----------------------------------------------+
+```
+
+### `strpos`
+
+Returns the starting position of a specified substring in a string. Positions
begin at 1. If the substring does not exist in the string, the function returns
0.
+
+```
+strpos(str, substr)
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+- **substr**: Substring expression to search for.
+
+#### Example
+
+```sql
+> select strpos('datafusion', 'fus');
++----------------------------------------+
+| strpos(Utf8("datafusion"),Utf8("fus")) |
++----------------------------------------+
+| 5 |
++----------------------------------------+
+```
+
+#### Aliases
+
+- instr
+- position
+
+### `substr`
+
+Extracts a substring of a specified number of characters from a specific
starting position in a string.
+
+```
+substr(str, start_pos[, length])
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+- **start_pos**: Character position to start the substring at. The first
character in the string has a position of 1.
+- **length**: Number of characters to extract. If not specified, returns the
rest of the string after the start position.
+
+#### Example
+
+```sql
+> select substr('datafusion', 5, 3);
++----------------------------------------------+
+| substr(Utf8("datafusion"),Int64(5),Int64(3)) |
++----------------------------------------------+
+| fus |
++----------------------------------------------+
+```
+
+#### Aliases
+
+- substring
+
+### `substr_index`
+
+Returns the substring from str before count occurrences of the delimiter delim.
+If count is positive, everything to the left of the final delimiter (counting
from the left) is returned.
+If count is negative, everything to the right of the final delimiter (counting
from the right) is returned.
+
+```
+substr_index(str, delim, count)
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+- **delim**: The string to find in str to split str.
+- **count**: The number of times to search for the delimiter. Can be either a
positive or negative number.
+
+#### Example
+
+```sql
+> select substr_index('www.apache.org', '.', 1);
++---------------------------------------------------------+
+| substr_index(Utf8("www.apache.org"),Utf8("."),Int64(1)) |
++---------------------------------------------------------+
+| www |
++---------------------------------------------------------+
+> select substr_index('www.apache.org', '.', -1);
++----------------------------------------------------------+
+| substr_index(Utf8("www.apache.org"),Utf8("."),Int64(-1)) |
++----------------------------------------------------------+
+| org |
++----------------------------------------------------------+
+```
+
+#### Aliases
+
+- substring_index
+
+### `substring`
+
+_Alias of [substr](#substr)._
+
+### `substring_index`
+
+_Alias of [substr_index](#substr_index)._
+
+### `to_hex`
+
+Converts an integer to a hexadecimal string.
+
+```
+to_hex(int)
+```
+
+#### Arguments
+
+- **int**: Integer expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+
+#### Example
+
+```sql
+> select to_hex(12345689);
++-------------------------+
+| to_hex(Int64(12345689)) |
++-------------------------+
+| bc6159 |
++-------------------------+
+```
+
+### `translate`
+
+Translates characters in a string to specified translation characters.
+
+```
+translate(str, chars, translation)
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+- **chars**: Characters to translate.
+- **translation**: Translation characters. Translation characters replace only
characters at the same position in the **chars** string.
+
+#### Example
+
+```sql
+> select translate('twice', 'wic', 'her');
++--------------------------------------------------+
+| translate(Utf8("twice"),Utf8("wic"),Utf8("her")) |
++--------------------------------------------------+
+| there |
++--------------------------------------------------+
+```
+
+### `trim`
+
+_Alias of [btrim](#btrim)._
+
+### `upper`
+
+Converts a string to upper-case.
+
+```
+upper(str)
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+
+#### Example
+
+```sql
+> select upper('dataFusion');
++---------------------------+
+| upper(Utf8("dataFusion")) |
++---------------------------+
+| DATAFUSION |
++---------------------------+
+```
+
+**Related functions**:
+
+- [initcap](#initcap)
+- [lower](#lower)
+
+### `uuid`
+
+Returns [`UUID
v4`](<https://en.wikipedia.org/wiki/Universally_unique_identifier#Version_4_(random)>)
string value which is unique per row.
+
+```
+uuid()
+```
+
+#### Example
+
+```sql
+> select uuid();
++--------------------------------------+
+| uuid() |
++--------------------------------------+
+| 6ec17ef8-1934-41cc-8d59-d0c8f9eea1f0 |
++--------------------------------------+
+```
+
## Binary String Functions
- [decode](#decode)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]