This is an automated email from the ASF dual-hosted git repository.
comphead pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 2b4e2003e1 doc-gen: migrate scalar functions (array) documentation 3/3
(#13930)
2b4e2003e1 is described below
commit 2b4e2003e15d372b7e6aa414e768c44cab5b8e2e
Author: Ian Lai <[email protected]>
AuthorDate: Mon Dec 30 03:25:33 2024 +0800
doc-gen: migrate scalar functions (array) documentation 3/3 (#13930)
* doc-gen: migrate scalar functions (array) documentation 3/3
* fix: import doc and macro, fix typo and update function docs
---------
Co-authored-by: Cheng-Yuan-Lai <a186235@g,ail.com>
---
datafusion/functions-nested/src/range.rs | 144 +++++++++------------
datafusion/functions-nested/src/remove.rs | 167 ++++++++++--------------
datafusion/functions-nested/src/repeat.rs | 71 +++++------
datafusion/functions-nested/src/replace.rs | 169 +++++++++----------------
datafusion/functions-nested/src/resize.rs | 63 ++++-----
datafusion/functions-nested/src/reverse.rs | 50 +++-----
datafusion/functions-nested/src/set_ops.rs | 162 ++++++++++--------------
datafusion/functions-nested/src/sort.rs | 66 ++++------
datafusion/functions-nested/src/string.rs | 133 ++++++++-----------
docs/source/user-guide/sql/scalar_functions.md | 123 ++++++++++--------
10 files changed, 486 insertions(+), 662 deletions(-)
diff --git a/datafusion/functions-nested/src/range.rs
b/datafusion/functions-nested/src/range.rs
index 8344c1a261..cf27c70c2b 100644
--- a/datafusion/functions-nested/src/range.rs
+++ b/datafusion/functions-nested/src/range.rs
@@ -37,16 +37,16 @@ use datafusion_common::cast::{
use datafusion_common::{
exec_datafusion_err, exec_err, internal_err, not_impl_datafusion_err,
Result,
};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
+use datafusion_macros::user_doc;
use itertools::Itertools;
use std::any::Any;
use std::cmp::Ordering;
use std::iter::from_fn;
use std::str::FromStr;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
make_udf_expr_and_func!(
Range,
@@ -55,6 +55,39 @@ make_udf_expr_and_func!(
"create a list of values in the range between start and stop",
range_udf
);
+
+#[user_doc(
+ doc_section(label = "Array Functions"),
+ description = "Returns an Arrow array between start and stop with step.
The range start..end contains all values with start <= x < end. It is empty if
start >= end. Step cannot be 0.",
+ syntax_example = "range(start, stop, step)",
+ sql_example = r#"```sql
+> select range(2, 10, 3);
++-----------------------------------+
+| range(Int64(2),Int64(10),Int64(3))|
++-----------------------------------+
+| [2, 5, 8] |
++-----------------------------------+
+
+> select range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH);
++--------------------------------------------------------------+
+| range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH) |
++--------------------------------------------------------------+
+| [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01] |
++--------------------------------------------------------------+
+```"#,
+ argument(
+ name = "start",
+ description = "Start of the range. Ints, timestamps, dates or string
types that can be coerced to Date32 are supported."
+ ),
+ argument(
+ name = "end",
+ description = "End of the range (not included). Type must be the same
as start."
+ ),
+ argument(
+ name = "step",
+ description = "Increase by step (cannot be 0). Steps less than a day
are supported only for timestamp ranges."
+ )
+)]
#[derive(Debug)]
pub(super) struct Range {
signature: Signature,
@@ -141,52 +174,10 @@ impl ScalarUDFImpl for Range {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_range_doc())
+ self.doc()
}
}
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_range_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_ARRAY,
- "Returns an Arrow array between start and stop with step. The
range start..end contains all values with start <= x < end. It is empty if
start >= end. Step cannot be 0.",
-
- "range(start, stop, step)")
- .with_sql_example(
- r#"```sql
-> select range(2, 10, 3);
-+-----------------------------------+
-| range(Int64(2),Int64(10),Int64(3))|
-+-----------------------------------+
-| [2, 5, 8] |
-+-----------------------------------+
-
-> select range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH);
-+--------------------------------------------------------------+
-| range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH) |
-+--------------------------------------------------------------+
-| [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01] |
-+--------------------------------------------------------------+
-```"#,
- )
- .with_argument(
- "start",
- "Start of the range. Ints, timestamps, dates or string types
that can be coerced to Date32 are supported.",
- )
- .with_argument(
- "end",
- "End of the range (not included). Type must be the same as
start.",
- )
- .with_argument(
- "step",
- "Increase by step (cannot be 0). Steps less than a day are
supported only for timestamp ranges.",
- )
- .build()
- })
-}
-
make_udf_expr_and_func!(
GenSeries,
gen_series,
@@ -194,6 +185,32 @@ make_udf_expr_and_func!(
"create a list of values in the range between start and stop, include
upper bound",
gen_series_udf
);
+
+#[user_doc(
+ doc_section(label = "Array Functions"),
+ description = "Similar to the range function, but it includes the upper
bound.",
+ syntax_example = "generate_series(start, stop, step)",
+ sql_example = r#"```sql
+> select generate_series(1,3);
++------------------------------------+
+| generate_series(Int64(1),Int64(3)) |
++------------------------------------+
+| [1, 2, 3] |
++------------------------------------+
+```"#,
+ argument(
+ name = "start",
+ description = "Start of the series. Ints, timestamps, dates or string
types that can be coerced to Date32 are supported."
+ ),
+ argument(
+ name = "end",
+ description = "End of the series (included). Type must be the same as
start."
+ ),
+ argument(
+ name = "step",
+ description = "Increase by step (can not be 0). Steps less than a day
are supported only for timestamp ranges."
+ )
+)]
#[derive(Debug)]
pub(super) struct GenSeries {
signature: Signature,
@@ -283,45 +300,10 @@ impl ScalarUDFImpl for GenSeries {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_generate_series_doc())
+ self.doc()
}
}
-static GENERATE_SERIES_DOCUMENTATION: OnceLock<Documentation> =
OnceLock::new();
-
-fn get_generate_series_doc() -> &'static Documentation {
- GENERATE_SERIES_DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_ARRAY,
- "Similar to the range function, but it includes the upper
bound.",
-
- "generate_series(start, stop, step)")
- .with_sql_example(
- r#"```sql
-> select generate_series(1,3);
-+------------------------------------+
-| generate_series(Int64(1),Int64(3)) |
-+------------------------------------+
-| [1, 2, 3] |
-+------------------------------------+
-```"#,
- )
- .with_argument(
- "start",
- "start of the series. Ints, timestamps, dates or string types
that can be coerced to Date32 are supported.",
- )
- .with_argument(
- "end",
- "end of the series (included). Type must be the same as
start.",
- )
- .with_argument(
- "step",
- "increase by step (can not be 0). Steps less than a day are
supported only for timestamp ranges.",
- )
- .build()
- })
-}
-
/// Generates an array of integers from start to stop with a given step.
///
/// This function takes 1 to 3 ArrayRefs as arguments, representing start,
stop, and step values.
diff --git a/datafusion/functions-nested/src/remove.rs
b/datafusion/functions-nested/src/remove.rs
index e5521706be..b6031ce733 100644
--- a/datafusion/functions-nested/src/remove.rs
+++ b/datafusion/functions-nested/src/remove.rs
@@ -27,12 +27,12 @@ use arrow_buffer::OffsetBuffer;
use arrow_schema::{DataType, Field};
use datafusion_common::cast::as_int64_array;
use datafusion_common::{exec_err, Result};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
+use datafusion_macros::user_doc;
use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
make_udf_expr_and_func!(
ArrayRemove,
@@ -42,6 +42,27 @@ make_udf_expr_and_func!(
array_remove_udf
);
+#[user_doc(
+ doc_section(label = "Array Functions"),
+ description = "Removes the first element from the array equal to the given
value.",
+ syntax_example = "array_remove(array, element)",
+ sql_example = r#"```sql
+> select array_remove([1, 2, 2, 3, 2, 1, 4], 2);
++----------------------------------------------+
+| array_remove(List([1,2,2,3,2,1,4]),Int64(2)) |
++----------------------------------------------+
+| [1, 2, 3, 2, 1, 4] |
++----------------------------------------------+
+```"#,
+ argument(
+ name = "array",
+ description = "Array expression. Can be a constant, column, or
function, and any combination of array operators."
+ ),
+ argument(
+ name = "element",
+ description = "Element to be removed from the array."
+ )
+)]
#[derive(Debug)]
pub(super) struct ArrayRemove {
signature: Signature,
@@ -87,41 +108,10 @@ impl ScalarUDFImpl for ArrayRemove {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_array_remove_doc())
+ self.doc()
}
}
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_array_remove_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_ARRAY,
- "Removes the first element from the array equal to the given
value.",
-
- "array_remove(array, element)")
- .with_sql_example(
- r#"```sql
-> select array_remove([1, 2, 2, 3, 2, 1, 4], 2);
-+----------------------------------------------+
-| array_remove(List([1,2,2,3,2,1,4]),Int64(2)) |
-+----------------------------------------------+
-| [1, 2, 3, 2, 1, 4] |
-+----------------------------------------------+
-```"#,
- )
- .with_argument(
- "array",
- "Array expression. Can be a constant, column, or function, and
any combination of array operators.",
- )
- .with_argument(
- "element",
- "Element to be removed from the array.",
- )
- .build()
- })
-}
-
make_udf_expr_and_func!(
ArrayRemoveN,
array_remove_n,
@@ -130,6 +120,28 @@ make_udf_expr_and_func!(
array_remove_n_udf
);
+#[user_doc(
+ doc_section(label = "Array Functions"),
+ description = "Removes the first `max` elements from the array equal to
the given value.",
+ syntax_example = "array_remove_n(array, element, max))",
+ sql_example = r#"```sql
+> select array_remove_n([1, 2, 2, 3, 2, 1, 4], 2, 2);
++---------------------------------------------------------+
+| array_remove_n(List([1,2,2,3,2,1,4]),Int64(2),Int64(2)) |
++---------------------------------------------------------+
+| [1, 3, 2, 1, 4] |
++---------------------------------------------------------+
+```"#,
+ argument(
+ name = "array",
+ description = "Array expression. Can be a constant, column, or
function, and any combination of array operators."
+ ),
+ argument(
+ name = "element",
+ description = "Element to be removed from the array."
+ ),
+ argument(name = "max", description = "Number of first occurrences to
remove.")
+)]
#[derive(Debug)]
pub(super) struct ArrayRemoveN {
signature: Signature,
@@ -175,43 +187,10 @@ impl ScalarUDFImpl for ArrayRemoveN {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_array_remove_n_doc())
+ self.doc()
}
}
-fn get_array_remove_n_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_ARRAY,
- "Removes the first `max` elements from the array equal to the
given value.",
-
- "array_remove_n(array, element, max)")
- .with_sql_example(
- r#"```sql
-> select array_remove_n([1, 2, 2, 3, 2, 1, 4], 2, 2);
-+---------------------------------------------------------+
-| array_remove_n(List([1,2,2,3,2,1,4]),Int64(2),Int64(2)) |
-+---------------------------------------------------------+
-| [1, 3, 2, 1, 4] |
-+---------------------------------------------------------+
-```"#,
- )
- .with_argument(
- "array",
- "Array expression. Can be a constant, column, or function, and
any combination of array operators.",
- )
- .with_argument(
- "element",
- "Element to be removed from the array.",
- )
- .with_argument(
- "max",
- "Number of first occurrences to remove.",
- )
- .build()
- })
-}
-
make_udf_expr_and_func!(
ArrayRemoveAll,
array_remove_all,
@@ -220,6 +199,27 @@ make_udf_expr_and_func!(
array_remove_all_udf
);
+#[user_doc(
+ doc_section(label = "Array Functions"),
+ description = "Removes all elements from the array equal to the given
value.",
+ syntax_example = "array_remove_all(array, element)",
+ sql_example = r#"```sql
+> select array_remove_all([1, 2, 2, 3, 2, 1, 4], 2);
++--------------------------------------------------+
+| array_remove_all(List([1,2,2,3,2,1,4]),Int64(2)) |
++--------------------------------------------------+
+| [1, 3, 1, 4] |
++--------------------------------------------------+
+```"#,
+ argument(
+ name = "array",
+ description = "Array expression. Can be a constant, column, or
function, and any combination of array operators."
+ ),
+ argument(
+ name = "element",
+ description = "Element to be removed from the array."
+ )
+)]
#[derive(Debug)]
pub(super) struct ArrayRemoveAll {
signature: Signature,
@@ -265,39 +265,10 @@ impl ScalarUDFImpl for ArrayRemoveAll {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_array_remove_all_doc())
+ self.doc()
}
}
-fn get_array_remove_all_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_ARRAY,
- "Removes all elements from the array equal to the given
value.",
-
- "array_remove_all(array, element)")
- .with_sql_example(
- r#"```sql
-> select array_remove_all([1, 2, 2, 3, 2, 1, 4], 2);
-+--------------------------------------------------+
-| array_remove_all(List([1,2,2,3,2,1,4]),Int64(2)) |
-+--------------------------------------------------+
-| [1, 3, 1, 4] |
-+--------------------------------------------------+
-```"#,
- )
- .with_argument(
- "array",
- "Array expression. Can be a constant, column, or function, and
any combination of array operators.",
- )
- .with_argument(
- "element",
- "Element to be removed from the array.",
- )
- .build()
- })
-}
-
/// Array_remove SQL function
pub fn array_remove_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() != 2 {
diff --git a/datafusion/functions-nested/src/repeat.rs
b/datafusion/functions-nested/src/repeat.rs
index 2842b91a78..498781f5b3 100644
--- a/datafusion/functions-nested/src/repeat.rs
+++ b/datafusion/functions-nested/src/repeat.rs
@@ -29,12 +29,12 @@ use arrow_schema::DataType::{LargeList, List};
use arrow_schema::{DataType, Field};
use datafusion_common::cast::{as_int64_array, as_large_list_array,
as_list_array};
use datafusion_common::{exec_err, Result};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
+use datafusion_macros::user_doc;
use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
make_udf_expr_and_func!(
ArrayRepeat,
@@ -43,6 +43,34 @@ make_udf_expr_and_func!(
"returns an array containing element `count` times.", // doc
array_repeat_udf // internal function name
);
+
+#[user_doc(
+ doc_section(label = "Array Functions"),
+ description = "Returns an array containing element `count` times.",
+ syntax_example = "array_repeat(element, count)",
+ sql_example = r#"```sql
+> select array_repeat(1, 3);
++---------------------------------+
+| array_repeat(Int64(1),Int64(3)) |
++---------------------------------+
+| [1, 1, 1] |
++---------------------------------+
+> select array_repeat([1, 2], 2);
++------------------------------------+
+| array_repeat(List([1,2]),Int64(2)) |
++------------------------------------+
+| [[1, 2], [1, 2]] |
++------------------------------------+
+```"#,
+ argument(
+ name = "element",
+ description = "Element expression. Can be a constant, column, or
function, and any combination of array operators."
+ ),
+ argument(
+ name = "count",
+ description = "Value of how many times to repeat the element."
+ )
+)]
#[derive(Debug)]
pub(super) struct ArrayRepeat {
signature: Signature,
@@ -91,47 +119,10 @@ impl ScalarUDFImpl for ArrayRepeat {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_array_repeat_doc())
+ self.doc()
}
}
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_array_repeat_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_ARRAY,
- "Returns an array containing element `count` times.",
-
- "array_repeat(element, count)")
- .with_sql_example(
- r#"```sql
-> select array_repeat(1, 3);
-+---------------------------------+
-| array_repeat(Int64(1),Int64(3)) |
-+---------------------------------+
-| [1, 1, 1] |
-+---------------------------------+
-> select array_repeat([1, 2], 2);
-+------------------------------------+
-| array_repeat(List([1,2]),Int64(2)) |
-+------------------------------------+
-| [[1, 2], [1, 2]] |
-+------------------------------------+
-```"#,
- )
- .with_argument(
- "element",
- "Element expression. Can be a constant, column, or function,
and any combination of array operators.",
- )
- .with_argument(
- "count",
- "Value of how many times to repeat the element.",
- )
- .build()
- })
-}
-
/// Array_repeat SQL function
pub fn array_repeat_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() != 2 {
diff --git a/datafusion/functions-nested/src/replace.rs
b/datafusion/functions-nested/src/replace.rs
index e971d97dbf..0902d1d036 100644
--- a/datafusion/functions-nested/src/replace.rs
+++ b/datafusion/functions-nested/src/replace.rs
@@ -27,16 +27,16 @@ use arrow_buffer::{BooleanBufferBuilder, NullBuffer,
OffsetBuffer};
use arrow_schema::Field;
use datafusion_common::cast::as_int64_array;
use datafusion_common::{exec_err, Result};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
+use datafusion_macros::user_doc;
use crate::utils::compare_element_to_list;
use crate::utils::make_scalar_function;
use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
// Create static instances of ScalarUDFs for each function
make_udf_expr_and_func!(ArrayReplace,
@@ -58,6 +58,25 @@ make_udf_expr_and_func!(ArrayReplaceAll,
array_replace_all_udf
);
+#[user_doc(
+ doc_section(label = "Array Functions"),
+ description = "Replaces the first occurrence of the specified element with
another specified element.",
+ syntax_example = "array_replace(array, from, to)",
+ sql_example = r#"```sql
+> select array_replace([1, 2, 2, 3, 2, 1, 4], 2, 5);
++--------------------------------------------------------+
+| array_replace(List([1,2,2,3,2,1,4]),Int64(2),Int64(5)) |
++--------------------------------------------------------+
+| [1, 5, 2, 3, 2, 1, 4] |
++--------------------------------------------------------+
+```"#,
+ argument(
+ name = "array",
+ description = "Array expression. Can be a constant, column, or
function, and any combination of array operators."
+ ),
+ argument(name = "from", description = "Initial element."),
+ argument(name = "to", description = "Final element.")
+)]
#[derive(Debug)]
pub(super) struct ArrayReplace {
signature: Signature,
@@ -103,45 +122,30 @@ impl ScalarUDFImpl for ArrayReplace {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_array_replace_doc())
+ self.doc()
}
}
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_array_replace_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_ARRAY,
- "Replaces the first occurrence of the specified element with
another specified element.",
-
- "array_replace(array, from, to)")
- .with_sql_example(
- r#"```sql
-> select array_replace([1, 2, 2, 3, 2, 1, 4], 2, 5);
-+--------------------------------------------------------+
-| array_replace(List([1,2,2,3,2,1,4]),Int64(2),Int64(5)) |
-+--------------------------------------------------------+
-| [1, 5, 2, 3, 2, 1, 4] |
-+--------------------------------------------------------+
+#[user_doc(
+ doc_section(label = "Array Functions"),
+ description = "Replaces the first `max` occurrences of the specified
element with another specified element.",
+ syntax_example = "array_replace_n(array, from, to, max)",
+ sql_example = r#"```sql
+> select array_replace_n([1, 2, 2, 3, 2, 1, 4], 2, 5, 2);
++-------------------------------------------------------------------+
+| array_replace_n(List([1,2,2,3,2,1,4]),Int64(2),Int64(5),Int64(2)) |
++-------------------------------------------------------------------+
+| [1, 5, 5, 3, 2, 1, 4] |
++-------------------------------------------------------------------+
```"#,
- )
- .with_argument(
- "array",
- "Array expression. Can be a constant, column, or function, and
any combination of array operators.",
- )
- .with_argument(
- "from",
- "Initial element.",
- )
- .with_argument(
- "to",
- "Final element.",
- )
- .build()
- })
-}
-
+ argument(
+ name = "array",
+ description = "Array expression. Can be a constant, column, or
function, and any combination of array operators."
+ ),
+ argument(name = "from", description = "Initial element."),
+ argument(name = "to", description = "Final element."),
+ argument(name = "max", description = "Number of first occurrences to
replace.")
+)]
#[derive(Debug)]
pub(super) struct ArrayReplaceN {
signature: Signature,
@@ -187,47 +191,29 @@ impl ScalarUDFImpl for ArrayReplaceN {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_array_replace_n_doc())
+ self.doc()
}
}
-fn get_array_replace_n_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_ARRAY,
- "Replaces the first `max` occurrences of the specified element
with another specified element.",
-
- "array_replace_n(array, from, to, max)")
- .with_sql_example(
- r#"```sql
-> select array_replace_n([1, 2, 2, 3, 2, 1, 4], 2, 5, 2);
-+-------------------------------------------------------------------+
-| array_replace_n(List([1,2,2,3,2,1,4]),Int64(2),Int64(5),Int64(2)) |
-+-------------------------------------------------------------------+
-| [1, 5, 5, 3, 2, 1, 4] |
-+-------------------------------------------------------------------+
+#[user_doc(
+ doc_section(label = "Array Functions"),
+ description = "Replaces all occurrences of the specified element with
another specified element.",
+ syntax_example = "array_replace_all(array, from, to)",
+ sql_example = r#"```sql
+> select array_replace_all([1, 2, 2, 3, 2, 1, 4], 2, 5);
++------------------------------------------------------------+
+| array_replace_all(List([1,2,2,3,2,1,4]),Int64(2),Int64(5)) |
++------------------------------------------------------------+
+| [1, 5, 5, 3, 5, 1, 4] |
++------------------------------------------------------------+
```"#,
- )
- .with_argument(
- "array",
- "Array expression. Can be a constant, column, or function, and
any combination of array operators.",
- )
- .with_argument(
- "from",
- "Initial element.",
- )
- .with_argument(
- "to",
- "Final element.",
- )
- .with_argument(
- "max",
- "Number of first occurrences to replace.",
- )
- .build()
- })
-}
-
+ argument(
+ name = "array",
+ description = "Array expression. Can be a constant, column, or
function, and any combination of array operators."
+ ),
+ argument(name = "from", description = "Initial element."),
+ argument(name = "to", description = "Final element.")
+)]
#[derive(Debug)]
pub(super) struct ArrayReplaceAll {
signature: Signature,
@@ -273,43 +259,10 @@ impl ScalarUDFImpl for ArrayReplaceAll {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_array_replace_all_doc())
+ self.doc()
}
}
-fn get_array_replace_all_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_ARRAY,
- "Replaces all occurrences of the specified element with
another specified element.",
-
- "array_replace_all(array, from, to)")
- .with_sql_example(
- r#"```sql
-> select array_replace_all([1, 2, 2, 3, 2, 1, 4], 2, 5);
-+------------------------------------------------------------+
-| array_replace_all(List([1,2,2,3,2,1,4]),Int64(2),Int64(5)) |
-+------------------------------------------------------------+
-| [1, 5, 5, 3, 5, 1, 4] |
-+------------------------------------------------------------+
-```"#,
- )
- .with_argument(
- "array",
- "Array expression. Can be a constant, column, or function, and
any combination of array operators.",
- )
- .with_argument(
- "from",
- "Initial element.",
- )
- .with_argument(
- "to",
- "Final element.",
- )
- .build()
- })
-}
-
/// For each element of `list_array[i]`, replaces up to `arr_n[i]` occurrences
/// of `from_array[i]`, `to_array[i]`.
///
diff --git a/datafusion/functions-nested/src/resize.rs
b/datafusion/functions-nested/src/resize.rs
index c9487dd818..8a4a88741c 100644
--- a/datafusion/functions-nested/src/resize.rs
+++ b/datafusion/functions-nested/src/resize.rs
@@ -27,12 +27,12 @@ use arrow_schema::DataType::{FixedSizeList, LargeList,
List};
use arrow_schema::{DataType, FieldRef};
use datafusion_common::cast::{as_int64_array, as_large_list_array,
as_list_array};
use datafusion_common::{exec_err, internal_datafusion_err, Result,
ScalarValue};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
+use datafusion_macros::user_doc;
use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
make_udf_expr_and_func!(
ArrayResize,
@@ -42,6 +42,28 @@ make_udf_expr_and_func!(
array_resize_udf
);
+#[user_doc(
+ doc_section(label = "Array Functions"),
+ description = "Resizes the list to contain size elements. Initializes new
elements with value or empty if value is not set.",
+ syntax_example = "array_resize(array, size, value)",
+ sql_example = r#"```sql
+> select array_resize([1, 2, 3], 5, 0);
++-------------------------------------+
+| array_resize(List([1,2,3],5,0)) |
++-------------------------------------+
+| [1, 2, 3, 0, 0] |
++-------------------------------------+
+```"#,
+ argument(
+ name = "array",
+ description = "Array expression. Can be a constant, column, or
function, and any combination of array operators."
+ ),
+ argument(name = "size", description = "New size of given array."),
+ argument(
+ name = "value",
+ description = "Defines new elements' value or empty if value is not
set."
+ )
+)]
#[derive(Debug)]
pub(super) struct ArrayResize {
signature: Signature,
@@ -93,45 +115,10 @@ impl ScalarUDFImpl for ArrayResize {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_array_resize_doc())
+ self.doc()
}
}
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_array_resize_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_ARRAY,
- "Resizes the list to contain size elements. Initializes new
elements with value or empty if value is not set.",
-
- "array_resize(array, size, value)")
- .with_sql_example(
- r#"```sql
-> select array_resize([1, 2, 3], 5, 0);
-+-------------------------------------+
-| array_resize(List([1,2,3],5,0)) |
-+-------------------------------------+
-| [1, 2, 3, 0, 0] |
-+-------------------------------------+
-```"#,
- )
- .with_argument(
- "array",
- "Array expression. Can be a constant, column, or function, and
any combination of array operators.",
- )
- .with_argument(
- "size",
- "New size of given array.",
- )
- .with_argument(
- "value",
- "Defines new elements' value or empty if value is not set.",
- )
- .build()
- })
-}
-
/// array_resize SQL function
pub(crate) fn array_resize_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
if arg.len() < 2 || arg.len() > 3 {
diff --git a/datafusion/functions-nested/src/reverse.rs
b/datafusion/functions-nested/src/reverse.rs
index aa898268d1..b394c1afea 100644
--- a/datafusion/functions-nested/src/reverse.rs
+++ b/datafusion/functions-nested/src/reverse.rs
@@ -25,12 +25,12 @@ use arrow_schema::DataType::{LargeList, List, Null};
use arrow_schema::{DataType, FieldRef};
use datafusion_common::cast::{as_large_list_array, as_list_array};
use datafusion_common::{exec_err, Result};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
+use datafusion_macros::user_doc;
use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
make_udf_expr_and_func!(
ArrayReverse,
@@ -40,6 +40,23 @@ make_udf_expr_and_func!(
array_reverse_udf
);
+#[user_doc(
+ doc_section(label = "Array Functions"),
+ description = "Returns the array with the order of the elements reversed.",
+ syntax_example = "array_reverse(array)",
+ sql_example = r#"```sql
+> select array_reverse([1, 2, 3, 4]);
++------------------------------------------------------------+
+| array_reverse(List([1, 2, 3, 4])) |
++------------------------------------------------------------+
+| [4, 3, 2, 1] |
++------------------------------------------------------------+
+```"#,
+ argument(
+ name = "array",
+ description = "Array expression. Can be a constant, column, or
function, and any combination of array operators."
+ )
+)]
#[derive(Debug)]
pub(super) struct ArrayReverse {
signature: Signature,
@@ -85,37 +102,10 @@ impl ScalarUDFImpl for ArrayReverse {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_array_reverse_doc())
+ self.doc()
}
}
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_array_reverse_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_ARRAY,
- "Returns the array with the order of the elements reversed.",
-
- "array_reverse(array)")
- .with_sql_example(
- r#"```sql
-> select array_reverse([1, 2, 3, 4]);
-+------------------------------------------------------------+
-| array_reverse(List([1, 2, 3, 4])) |
-+------------------------------------------------------------+
-| [4, 3, 2, 1] |
-+------------------------------------------------------------+
-```"#,
- )
- .with_argument(
- "array",
- "Array expression. Can be a constant, column, or function, and
any combination of array operators.",
- )
- .build()
- })
-}
-
/// array_reverse SQL function
pub fn array_reverse_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
if arg.len() != 1 {
diff --git a/datafusion/functions-nested/src/set_ops.rs
b/datafusion/functions-nested/src/set_ops.rs
index faefa45e92..202330715b 100644
--- a/datafusion/functions-nested/src/set_ops.rs
+++ b/datafusion/functions-nested/src/set_ops.rs
@@ -27,15 +27,15 @@ use arrow::row::{RowConverter, SortField};
use arrow_schema::DataType::{FixedSizeList, LargeList, List, Null};
use datafusion_common::cast::{as_large_list_array, as_list_array};
use datafusion_common::{exec_err, internal_err, Result};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
+use datafusion_macros::user_doc;
use itertools::Itertools;
use std::any::Any;
use std::collections::HashSet;
use std::fmt::{Display, Formatter};
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
// Create static instances of ScalarUDFs for each function
make_udf_expr_and_func!(
@@ -62,6 +62,33 @@ make_udf_expr_and_func!(
array_distinct_udf
);
+#[user_doc(
+ doc_section(label = "Array Functions"),
+ description = "Returns an array of elements that are present in both
arrays (all elements from both arrays) with out duplicates.",
+ syntax_example = "array_union(array1, array2)",
+ sql_example = r#"```sql
+> select array_union([1, 2, 3, 4], [5, 6, 3, 4]);
++----------------------------------------------------+
+| array_union([1, 2, 3, 4], [5, 6, 3, 4]); |
++----------------------------------------------------+
+| [1, 2, 3, 4, 5, 6] |
++----------------------------------------------------+
+> select array_union([1, 2, 3, 4], [5, 6, 7, 8]);
++----------------------------------------------------+
+| array_union([1, 2, 3, 4], [5, 6, 7, 8]); |
++----------------------------------------------------+
+| [1, 2, 3, 4, 5, 6, 7, 8] |
++----------------------------------------------------+
+```"#,
+ argument(
+ name = "array1",
+ description = "Array expression. Can be a constant, column, or
function, and any combination of array operators."
+ ),
+ argument(
+ name = "array2",
+ description = "Array expression. Can be a constant, column, or
function, and any combination of array operators."
+ )
+)]
#[derive(Debug)]
pub(super) struct ArrayUnion {
signature: Signature,
@@ -111,47 +138,37 @@ impl ScalarUDFImpl for ArrayUnion {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_array_union_doc())
+ self.doc()
}
}
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_array_union_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_ARRAY,
- "Returns an array of elements that are present in both arrays
(all elements from both arrays) with out duplicates.",
-
- "array_union(array1, array2)")
- .with_sql_example(
- r#"```sql
-> select array_union([1, 2, 3, 4], [5, 6, 3, 4]);
+#[user_doc(
+ doc_section(label = "Array Functions"),
+ description = "Returns an array of elements in the intersection of array1
and array2.",
+ syntax_example = "array_intersect(array1, array2)",
+ sql_example = r#"```sql
+> select array_intersect([1, 2, 3, 4], [5, 6, 3, 4]);
+----------------------------------------------------+
-| array_union([1, 2, 3, 4], [5, 6, 3, 4]); |
+| array_intersect([1, 2, 3, 4], [5, 6, 3, 4]); |
+----------------------------------------------------+
-| [1, 2, 3, 4, 5, 6] |
+| [3, 4] |
+----------------------------------------------------+
-> select array_union([1, 2, 3, 4], [5, 6, 7, 8]);
+> select array_intersect([1, 2, 3, 4], [5, 6, 7, 8]);
+----------------------------------------------------+
-| array_union([1, 2, 3, 4], [5, 6, 7, 8]); |
+| array_intersect([1, 2, 3, 4], [5, 6, 7, 8]); |
+----------------------------------------------------+
-| [1, 2, 3, 4, 5, 6, 7, 8] |
+| [] |
+----------------------------------------------------+
```"#,
- )
- .with_argument(
- "array1",
- "Array expression. Can be a constant, column, or function, and
any combination of array operators.",
- )
- .with_argument(
- "array2",
- "Array expression. Can be a constant, column, or function, and
any combination of array operators.",
- )
- .build()
- })
-}
-
+ argument(
+ name = "array1",
+ description = "Array expression. Can be a constant, column, or
function, and any combination of array operators."
+ ),
+ argument(
+ name = "array2",
+ description = "Array expression. Can be a constant, column, or
function, and any combination of array operators."
+ )
+)]
#[derive(Debug)]
pub(super) struct ArrayIntersect {
signature: Signature,
@@ -201,45 +218,27 @@ impl ScalarUDFImpl for ArrayIntersect {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_array_intersect_doc())
+ self.doc()
}
}
-fn get_array_intersect_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_ARRAY,
- "Returns an array of elements in the intersection of array1
and array2.",
-
- "array_intersect(array1, array2)")
- .with_sql_example(
- r#"```sql
-> select array_intersect([1, 2, 3, 4], [5, 6, 3, 4]);
-+----------------------------------------------------+
-| array_intersect([1, 2, 3, 4], [5, 6, 3, 4]); |
-+----------------------------------------------------+
-| [3, 4] |
-+----------------------------------------------------+
-> select array_intersect([1, 2, 3, 4], [5, 6, 7, 8]);
-+----------------------------------------------------+
-| array_intersect([1, 2, 3, 4], [5, 6, 7, 8]); |
-+----------------------------------------------------+
-| [] |
-+----------------------------------------------------+
+#[user_doc(
+ doc_section(label = "Array Functions"),
+ description = "Returns distinct values from the array after removing
duplicates.",
+ syntax_example = "array_distinct(array)",
+ sql_example = r#"```sql
+> select array_distinct([1, 3, 2, 3, 1, 2, 4]);
++---------------------------------+
+| array_distinct(List([1,2,3,4])) |
++---------------------------------+
+| [1, 2, 3, 4] |
++---------------------------------+
```"#,
- )
- .with_argument(
- "array1",
- "Array expression. Can be a constant, column, or function, and
any combination of array operators.",
- )
- .with_argument(
- "array2",
- "Array expression. Can be a constant, column, or function, and
any combination of array operators.",
- )
- .build()
- })
-}
-
+ argument(
+ name = "array",
+ description = "Array expression. Can be a constant, column, or
function, and any combination of array operators."
+ )
+)]
#[derive(Debug)]
pub(super) struct ArrayDistinct {
signature: Signature,
@@ -296,35 +295,10 @@ impl ScalarUDFImpl for ArrayDistinct {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_array_distinct_doc())
+ self.doc()
}
}
-fn get_array_distinct_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_ARRAY,
- "Returns distinct values from the array after removing
duplicates.",
-
- "array_distinct(array)")
- .with_sql_example(
- r#"```sql
-> select array_distinct([1, 3, 2, 3, 1, 2, 4]);
-+---------------------------------+
-| array_distinct(List([1,2,3,4])) |
-+---------------------------------+
-| [1, 2, 3, 4] |
-+---------------------------------+
-```"#,
- )
- .with_argument(
- "array",
- "Array expression. Can be a constant, column, or function, and
any combination of array operators.",
- )
- .build()
- })
-}
-
/// array_distinct SQL function
/// example: from list [1, 3, 2, 3, 1, 2, 4] to [1, 2, 3, 4]
fn array_distinct_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
diff --git a/datafusion/functions-nested/src/sort.rs
b/datafusion/functions-nested/src/sort.rs
index 043fedd89b..0c5309e545 100644
--- a/datafusion/functions-nested/src/sort.rs
+++ b/datafusion/functions-nested/src/sort.rs
@@ -25,12 +25,12 @@ use arrow_schema::DataType::{FixedSizeList, LargeList,
List};
use arrow_schema::{DataType, Field, SortOptions};
use datafusion_common::cast::{as_list_array, as_string_array};
use datafusion_common::{exec_err, Result};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
+use datafusion_macros::user_doc;
use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
make_udf_expr_and_func!(
ArraySort,
@@ -40,6 +40,31 @@ make_udf_expr_and_func!(
array_sort_udf
);
+#[user_doc(
+ doc_section(label = "Array Functions"),
+ description = "Sort array.",
+ syntax_example = "array_sort(array, desc, nulls_first)",
+ sql_example = r#"```sql
+> select array_sort([3, 1, 2]);
++-----------------------------+
+| array_sort(List([3,1,2])) |
++-----------------------------+
+| [1, 2, 3] |
++-----------------------------+
+```"#,
+ argument(
+ name = "array",
+ description = "Array expression. Can be a constant, column, or
function, and any combination of array operators."
+ ),
+ argument(
+ name = "desc",
+ description = "Whether to sort in descending order(`ASC` or `DESC`)."
+ ),
+ argument(
+ name = "nulls_first",
+ description = "Whether to sort nulls first(`NULLS FIRST` or `NULLS
LAST`)."
+ )
+)]
#[derive(Debug)]
pub(super) struct ArraySort {
signature: Signature,
@@ -96,45 +121,10 @@ impl ScalarUDFImpl for ArraySort {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_array_sort_doc())
+ self.doc()
}
}
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_array_sort_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_ARRAY,
- "Sort array.",
-
- "array_sort(array, desc, nulls_first)")
- .with_sql_example(
- r#"```sql
-> select array_sort([3, 1, 2]);
-+-----------------------------+
-| array_sort(List([3,1,2])) |
-+-----------------------------+
-| [1, 2, 3] |
-+-----------------------------+
-```"#,
- )
- .with_argument(
- "array",
- "Array expression. Can be a constant, column, or function, and
any combination of array operators.",
- )
- .with_argument(
- "desc",
- "Whether to sort in descending order(`ASC` or `DESC`).",
- )
- .with_argument(
- "nulls_first",
- "Whether to sort nulls first(`NULLS FIRST` or `NULLS LAST`).",
- )
- .build()
- })
-}
-
/// Array_sort SQL function
pub fn array_sort_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.is_empty() || args.len() > 3 {
diff --git a/datafusion/functions-nested/src/string.rs
b/datafusion/functions-nested/src/string.rs
index 9288b374da..ee022053cf 100644
--- a/datafusion/functions-nested/src/string.rs
+++ b/datafusion/functions-nested/src/string.rs
@@ -42,13 +42,13 @@ use arrow_schema::DataType::{
};
use datafusion_common::cast::{as_large_list_array, as_list_array};
use datafusion_common::exec_err;
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_functions::strings::StringArrayType;
use datafusion_functions::{downcast_arg, downcast_named_arg};
-use std::sync::{Arc, OnceLock};
+use datafusion_macros::user_doc;
+use std::sync::Arc;
macro_rules! call_array_function {
($DATATYPE:expr, false) => {
@@ -121,6 +121,29 @@ make_udf_expr_and_func!(
"converts each element to its text representation.", // doc
array_to_string_udf // internal function name
);
+
+#[user_doc(
+ doc_section(label = "Array Functions"),
+ description = "Converts each element to its text representation.",
+ syntax_example = "array_to_string(array, delimiter[, null_string])",
+ sql_example = r#"```sql
+> select array_to_string([[1, 2, 3, 4], [5, 6, 7, 8]], ',');
++----------------------------------------------------+
+| array_to_string(List([1,2,3,4,5,6,7,8]),Utf8(",")) |
++----------------------------------------------------+
+| 1,2,3,4,5,6,7,8 |
++----------------------------------------------------+
+```"#,
+ argument(
+ name = "array",
+ description = "Array expression. Can be a constant, column, or
function, and any combination of array operators."
+ ),
+ argument(name = "delimiter", description = "Array element separator."),
+ argument(
+ name = "null_string",
+ description = "Optional. String to replace null values in the array.
If not provided, nulls will be handled by default behavior."
+ )
+)]
#[derive(Debug)]
pub(super) struct ArrayToString {
signature: Signature,
@@ -175,45 +198,10 @@ impl ScalarUDFImpl for ArrayToString {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_array_to_string_doc())
+ self.doc()
}
}
-static DOCUMENTATION_ARRAY_TO_STRING: OnceLock<Documentation> =
OnceLock::new();
-
-fn get_array_to_string_doc() -> &'static Documentation {
- DOCUMENTATION_ARRAY_TO_STRING.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_ARRAY,
- "Converts each element to its text representation.",
-
- "array_to_string(array, delimiter[, null_string])")
- .with_sql_example(
- r#"```sql
-> select array_to_string([[1, 2, 3, 4], [5, 6, 7, 8]], ',');
-+----------------------------------------------------+
-| array_to_string(List([1,2,3,4,5,6,7,8]),Utf8(",")) |
-+----------------------------------------------------+
-| 1,2,3,4,5,6,7,8 |
-+----------------------------------------------------+
-```"#,
- )
- .with_argument(
- "array",
- "Array expression. Can be a constant, column, or function, and
any combination of array operators.",
- )
- .with_argument(
- "delimiter",
- "Array element separator.",
- )
- .with_argument(
- "null_string",
- "Optional. String to replace null values in the array. If not
provided, nulls will be handled by default behavior.",
- )
- .build()
- })
-}
-
make_udf_expr_and_func!(
StringToArray,
string_to_array,
@@ -221,6 +209,32 @@ make_udf_expr_and_func!(
"splits a `string` based on a `delimiter` and returns an array of parts.
Any parts matching the optional `null_string` will be replaced with `NULL`", //
doc
string_to_array_udf // internal function name
);
+
+#[user_doc(
+ doc_section(label = "Array Functions"),
+ description = "Splits a string into an array of substrings based on a
delimiter. Any substrings matching the optional `null_str` argument are
replaced with NULL.",
+ syntax_example = "string_to_array(str, delimiter[, null_str])",
+ sql_example = r#"```sql
+> select string_to_array('abc##def', '##');
++-----------------------------------+
+| string_to_array(Utf8('abc##def')) |
++-----------------------------------+
+| ['abc', 'def'] |
++-----------------------------------+
+> select string_to_array('abc def', ' ', 'def');
++---------------------------------------------+
+| string_to_array(Utf8('abc def'), Utf8(' '), Utf8('def')) |
++---------------------------------------------+
+| ['abc', NULL] |
++---------------------------------------------+
+```"#,
+ argument(name = "str", description = "String expression to split."),
+ argument(name = "delimiter", description = "Delimiter string to split
on."),
+ argument(
+ name = "null_str",
+ description = "Substring values to be replaced with `NULL`."
+ )
+)]
#[derive(Debug)]
pub(super) struct StringToArray {
signature: Signature,
@@ -284,51 +298,10 @@ impl ScalarUDFImpl for StringToArray {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_string_to_array_doc())
+ self.doc()
}
}
-static DOCUMENTATION_STRING_TO_ARRAY: OnceLock<Documentation> =
OnceLock::new();
-
-fn get_string_to_array_doc() -> &'static Documentation {
- DOCUMENTATION_STRING_TO_ARRAY.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_ARRAY,
- "Splits a string into an array of substrings based on a
delimiter. Any substrings matching the optional `null_str` argument are
replaced with NULL.",
-
- "string_to_array(str, delimiter[, null_str])")
- .with_sql_example(
- r#"```sql
-> select string_to_array('abc##def', '##');
-+-----------------------------------+
-| string_to_array(Utf8('abc##def')) |
-+-----------------------------------+
-| ['abc', 'def'] |
-+-----------------------------------+
-> select string_to_array('abc def', ' ', 'def');
-+---------------------------------------------+
-| string_to_array(Utf8('abc def'), Utf8(' '), Utf8('def')) |
-+---------------------------------------------+
-| ['abc', NULL] |
-+---------------------------------------------+
-```"#,
- )
- .with_argument(
- "str",
- "String expression to split.",
- )
- .with_argument(
- "delimiter",
- "Delimiter string to split on.",
- )
- .with_argument(
- "null_str",
- "Substring values to be replaced with `NULL`.",
- )
- .build()
- })
-}
-
/// Array_to_string SQL function
pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() < 2 || args.len() > 3 {
diff --git a/docs/source/user-guide/sql/scalar_functions.md
b/docs/source/user-guide/sql/scalar_functions.md
index 79fe440f37..4cf5ff4b71 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -2936,25 +2936,32 @@ _Alias of [array_position](#array_position)._
### `array_intersect`
-Returns distinct values from the array after removing duplicates.
+Returns an array of elements in the intersection of array1 and array2.
```
-array_distinct(array)
+array_intersect(array1, array2)
```
#### Arguments
-- **array**: Array expression. Can be a constant, column, or function, and any
combination of array operators.
+- **array1**: Array expression. Can be a constant, column, or function, and
any combination of array operators.
+- **array2**: Array expression. Can be a constant, column, or function, and
any combination of array operators.
#### Example
```sql
-> select array_distinct([1, 3, 2, 3, 1, 2, 4]);
-+---------------------------------+
-| array_distinct(List([1,2,3,4])) |
-+---------------------------------+
-| [1, 2, 3, 4] |
-+---------------------------------+
+> select array_intersect([1, 2, 3, 4], [5, 6, 3, 4]);
++----------------------------------------------------+
+| array_intersect([1, 2, 3, 4], [5, 6, 3, 4]); |
++----------------------------------------------------+
+| [3, 4] |
++----------------------------------------------------+
+> select array_intersect([1, 2, 3, 4], [5, 6, 7, 8]);
++----------------------------------------------------+
+| array_intersect([1, 2, 3, 4], [5, 6, 7, 8]); |
++----------------------------------------------------+
+| [] |
++----------------------------------------------------+
```
#### Aliases
@@ -3217,10 +3224,10 @@ array_remove(array, element)
### `array_remove_all`
-Removes the first element from the array equal to the given value.
+Removes all elements from the array equal to the given value.
```
-array_remove(array, element)
+array_remove_all(array, element)
```
#### Arguments
@@ -3231,12 +3238,12 @@ array_remove(array, element)
#### Example
```sql
-> select array_remove([1, 2, 2, 3, 2, 1, 4], 2);
-+----------------------------------------------+
-| array_remove(List([1,2,2,3,2,1,4]),Int64(2)) |
-+----------------------------------------------+
-| [1, 2, 3, 2, 1, 4] |
-+----------------------------------------------+
+> select array_remove_all([1, 2, 2, 3, 2, 1, 4], 2);
++--------------------------------------------------+
+| array_remove_all(List([1,2,2,3,2,1,4]),Int64(2)) |
++--------------------------------------------------+
+| [1, 3, 1, 4] |
++--------------------------------------------------+
```
#### Aliases
@@ -3245,26 +3252,27 @@ array_remove(array, element)
### `array_remove_n`
-Removes the first element from the array equal to the given value.
+Removes the first `max` elements from the array equal to the given value.
```
-array_remove(array, element)
+array_remove_n(array, element, max))
```
#### Arguments
- **array**: Array expression. Can be a constant, column, or function, and any
combination of array operators.
- **element**: Element to be removed from the array.
+- **max**: Number of first occurrences to remove.
#### Example
```sql
-> select array_remove([1, 2, 2, 3, 2, 1, 4], 2);
-+----------------------------------------------+
-| array_remove(List([1,2,2,3,2,1,4]),Int64(2)) |
-+----------------------------------------------+
-| [1, 2, 3, 2, 1, 4] |
-+----------------------------------------------+
+> select array_remove_n([1, 2, 2, 3, 2, 1, 4], 2, 2);
++---------------------------------------------------------+
+| array_remove_n(List([1,2,2,3,2,1,4]),Int64(2),Int64(2)) |
++---------------------------------------------------------+
+| [1, 3, 2, 1, 4] |
++---------------------------------------------------------+
```
#### Aliases
@@ -3307,10 +3315,10 @@ array_repeat(element, count)
### `array_replace`
-Replaces the first `max` occurrences of the specified element with another
specified element.
+Replaces the first occurrence of the specified element with another specified
element.
```
-array_replace_n(array, from, to, max)
+array_replace(array, from, to)
```
#### Arguments
@@ -3318,17 +3326,16 @@ array_replace_n(array, from, to, max)
- **array**: Array expression. Can be a constant, column, or function, and any
combination of array operators.
- **from**: Initial element.
- **to**: Final element.
-- **max**: Number of first occurrences to replace.
#### Example
```sql
-> select array_replace_n([1, 2, 2, 3, 2, 1, 4], 2, 5, 2);
-+-------------------------------------------------------------------+
-| array_replace_n(List([1,2,2,3,2,1,4]),Int64(2),Int64(5),Int64(2)) |
-+-------------------------------------------------------------------+
-| [1, 5, 5, 3, 2, 1, 4] |
-+-------------------------------------------------------------------+
+> select array_replace([1, 2, 2, 3, 2, 1, 4], 2, 5);
++--------------------------------------------------------+
+| array_replace(List([1,2,2,3,2,1,4]),Int64(2),Int64(5)) |
++--------------------------------------------------------+
+| [1, 5, 2, 3, 2, 1, 4] |
++--------------------------------------------------------+
```
#### Aliases
@@ -3337,10 +3344,10 @@ array_replace_n(array, from, to, max)
### `array_replace_all`
-Replaces the first `max` occurrences of the specified element with another
specified element.
+Replaces all occurrences of the specified element with another specified
element.
```
-array_replace_n(array, from, to, max)
+array_replace_all(array, from, to)
```
#### Arguments
@@ -3348,17 +3355,16 @@ array_replace_n(array, from, to, max)
- **array**: Array expression. Can be a constant, column, or function, and any
combination of array operators.
- **from**: Initial element.
- **to**: Final element.
-- **max**: Number of first occurrences to replace.
#### Example
```sql
-> select array_replace_n([1, 2, 2, 3, 2, 1, 4], 2, 5, 2);
-+-------------------------------------------------------------------+
-| array_replace_n(List([1,2,2,3,2,1,4]),Int64(2),Int64(5),Int64(2)) |
-+-------------------------------------------------------------------+
-| [1, 5, 5, 3, 2, 1, 4] |
-+-------------------------------------------------------------------+
+> select array_replace_all([1, 2, 2, 3, 2, 1, 4], 2, 5);
++------------------------------------------------------------+
+| array_replace_all(List([1,2,2,3,2,1,4]),Int64(2),Int64(5)) |
++------------------------------------------------------------+
+| [1, 5, 5, 3, 5, 1, 4] |
++------------------------------------------------------------+
```
#### Aliases
@@ -3543,25 +3549,32 @@ array_to_string(array, delimiter[, null_string])
### `array_union`
-Returns distinct values from the array after removing duplicates.
+Returns an array of elements that are present in both arrays (all elements
from both arrays) with out duplicates.
```
-array_distinct(array)
+array_union(array1, array2)
```
#### Arguments
-- **array**: Array expression. Can be a constant, column, or function, and any
combination of array operators.
+- **array1**: Array expression. Can be a constant, column, or function, and
any combination of array operators.
+- **array2**: Array expression. Can be a constant, column, or function, and
any combination of array operators.
#### Example
```sql
-> select array_distinct([1, 3, 2, 3, 1, 2, 4]);
-+---------------------------------+
-| array_distinct(List([1,2,3,4])) |
-+---------------------------------+
-| [1, 2, 3, 4] |
-+---------------------------------+
+> select array_union([1, 2, 3, 4], [5, 6, 3, 4]);
++----------------------------------------------------+
+| array_union([1, 2, 3, 4], [5, 6, 3, 4]); |
++----------------------------------------------------+
+| [1, 2, 3, 4, 5, 6] |
++----------------------------------------------------+
+> select array_union([1, 2, 3, 4], [5, 6, 7, 8]);
++----------------------------------------------------+
+| array_union([1, 2, 3, 4], [5, 6, 7, 8]); |
++----------------------------------------------------+
+| [1, 2, 3, 4, 5, 6, 7, 8] |
++----------------------------------------------------+
```
#### Aliases
@@ -3657,9 +3670,9 @@ generate_series(start, stop, step)
#### Arguments
-- **start**: start of the series. Ints, timestamps, dates or string types that
can be coerced to Date32 are supported.
-- **end**: end of the series (included). Type must be the same as start.
-- **step**: increase by step (can not be 0). Steps less than a day are
supported only for timestamp ranges.
+- **start**: Start of the series. Ints, timestamps, dates or string types that
can be coerced to Date32 are supported.
+- **end**: End of the series (included). Type must be the same as start.
+- **step**: Increase by step (can not be 0). Steps less than a day are
supported only for timestamp ranges.
#### Example
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]