This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new a203c2b167 doc-gen: migrate scalar functions (datetime) documentation
1/2 (#13920)
a203c2b167 is described below
commit a203c2b167123eba4a2f36df4f954a6fecf536a5
Author: Ian Lai <[email protected]>
AuthorDate: Sun Dec 29 21:11:35 2024 +0800
doc-gen: migrate scalar functions (datetime) documentation 1/2 (#13920)
* doc-gen: migrate scalar functions (datetime) documentation 1/2
* fix: fix typo and update function docs
---------
Co-authored-by: Cheng-Yuan-Lai <a186235@g,ail.com>
---
datafusion/functions/src/datetime/current_date.rs | 30 ++----
datafusion/functions/src/datetime/current_time.rs | 30 ++----
datafusion/functions/src/datetime/date_bin.rs | 118 ++++++++++-----------
datafusion/functions/src/datetime/date_part.rs | 78 +++++++-------
datafusion/functions/src/datetime/date_trunc.rs | 60 +++++------
datafusion/functions/src/datetime/from_unixtime.rs | 49 ++++-----
docs/source/user-guide/sql/scalar_functions.md | 28 +++--
7 files changed, 175 insertions(+), 218 deletions(-)
diff --git a/datafusion/functions/src/datetime/current_date.rs
b/datafusion/functions/src/datetime/current_date.rs
index 97d97939d3..868cbe23d6 100644
--- a/datafusion/functions/src/datetime/current_date.rs
+++ b/datafusion/functions/src/datetime/current_date.rs
@@ -22,13 +22,21 @@ use arrow::datatypes::DataType::Date32;
use chrono::{Datelike, NaiveDate};
use datafusion_common::{internal_err, Result, ScalarValue};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
use datafusion_expr::{
ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
};
-use std::sync::OnceLock;
+use datafusion_macros::user_doc;
+#[user_doc(
+ doc_section(label = "Time and Date Functions"),
+ description = r#"
+Returns the current UTC date.
+
+The `current_date()` return value is determined at query time and will return
the same date, no matter when in the query plan the function executes.
+"#,
+ syntax_example = "current_date()"
+)]
#[derive(Debug)]
pub struct CurrentDateFunc {
signature: Signature,
@@ -105,22 +113,6 @@ impl ScalarUDFImpl for CurrentDateFunc {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_current_date_doc())
+ self.doc()
}
}
-
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_current_date_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_DATETIME,
- r#"
-Returns the current UTC date.
-
-The `current_date()` return value is determined at query time and will return
the same date, no matter when in the query plan the function executes.
-"#,
- "current_date()")
- .build()
- })
-}
diff --git a/datafusion/functions/src/datetime/current_time.rs
b/datafusion/functions/src/datetime/current_time.rs
index 1cd39e5777..142184508e 100644
--- a/datafusion/functions/src/datetime/current_time.rs
+++ b/datafusion/functions/src/datetime/current_time.rs
@@ -19,15 +19,23 @@ use arrow::datatypes::DataType;
use arrow::datatypes::DataType::Time64;
use arrow::datatypes::TimeUnit::Nanosecond;
use std::any::Any;
-use std::sync::OnceLock;
use datafusion_common::{internal_err, Result, ScalarValue};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
use datafusion_expr::{
ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
};
+use datafusion_macros::user_doc;
+#[user_doc(
+ doc_section(label = "Time and Date Functions"),
+ description = r#"
+Returns the current UTC time.
+
+The `current_time()` return value is determined at query time and will return
the same time, no matter when in the query plan the function executes.
+"#,
+ syntax_example = "current_time()"
+)]
#[derive(Debug)]
pub struct CurrentTimeFunc {
signature: Signature,
@@ -93,22 +101,6 @@ impl ScalarUDFImpl for CurrentTimeFunc {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_current_time_doc())
+ self.doc()
}
}
-
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_current_time_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_DATETIME,
- r#"
-Returns the current UTC time.
-
-The `current_time()` return value is determined at query time and will return
the same time, no matter when in the query plan the function executes.
-"#,
- "current_time()")
- .build()
- })
-}
diff --git a/datafusion/functions/src/datetime/date_bin.rs
b/datafusion/functions/src/datetime/date_bin.rs
index bb3f2177b9..a288693699 100644
--- a/datafusion/functions/src/datetime/date_bin.rs
+++ b/datafusion/functions/src/datetime/date_bin.rs
@@ -16,7 +16,7 @@
// under the License.
use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
use arrow::array::temporal_conversions::NANOSECONDS;
use arrow::array::types::{
@@ -37,10 +37,64 @@ use datafusion_expr::TypeSignature::Exact;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
TIMEZONE_WILDCARD,
};
+use datafusion_macros::user_doc;
use chrono::{DateTime, Datelike, Duration, Months, TimeDelta, Utc};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
+#[user_doc(
+ doc_section(label = "Time and Date Functions"),
+ description = r#"
+Calculates time intervals and returns the start of the interval nearest to the
specified timestamp. Use `date_bin` to downsample time series data by grouping
rows into time-based "bins" or "windows" and applying an aggregate or selector
function to each window.
+
+For example, if you "bin" or "window" data into 15 minute intervals, an input
timestamp of `2023-01-01T18:18:18Z` will be updated to the start time of the 15
minute bin it is in: `2023-01-01T18:15:00Z`.
+"#,
+ syntax_example = "date_bin(interval, expression, origin-timestamp)",
+ sql_example = r#"```sql
+-- Bin the timestamp into 1 day intervals
+> SELECT date_bin(interval '1 day', time) as bin
+FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time);
++---------------------+
+| bin |
++---------------------+
+| 2023-01-01T00:00:00 |
+| 2023-01-03T00:00:00 |
++---------------------+
+2 row(s) fetched.
+
+-- Bin the timestamp into 1 day intervals starting at 3AM on 2023-01-01
+> SELECT date_bin(interval '1 day', time, '2023-01-01T03:00:00') as bin
+FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time);
++---------------------+
+| bin |
++---------------------+
+| 2023-01-01T03:00:00 |
+| 2023-01-03T03:00:00 |
++---------------------+
+2 row(s) fetched.
+```"#,
+ argument(name = "interval", description = "Bin interval."),
+ argument(
+ name = "expression",
+ description = "Time expression to operate on. Can be a constant,
column, or function."
+ ),
+ argument(
+ name = "origin-timestamp",
+ description = r#"Optional. Starting point used to determine bin
boundaries. If not specified defaults 1970-01-01T00:00:00Z (the UNIX epoch in
UTC). The following intervals are supported:
+
+ - nanoseconds
+ - microseconds
+ - milliseconds
+ - seconds
+ - minutes
+ - hours
+ - days
+ - weeks
+ - months
+ - years
+ - century
+"#
+ )
+)]
#[derive(Debug)]
pub struct DateBinFunc {
signature: Signature,
@@ -169,68 +223,10 @@ impl ScalarUDFImpl for DateBinFunc {
}
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_date_bin_doc())
+ self.doc()
}
}
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_date_bin_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_DATETIME,
- r#"
-Calculates time intervals and returns the start of the interval nearest to the
specified timestamp. Use `date_bin` to downsample time series data by grouping
rows into time-based "bins" or "windows" and applying an aggregate or selector
function to each window.
-
-For example, if you "bin" or "window" data into 15 minute intervals, an input
timestamp of `2023-01-01T18:18:18Z` will be updated to the start time of the 15
minute bin it is in: `2023-01-01T18:15:00Z`.
-"#,
- "date_bin(interval, expression, origin-timestamp)")
- .with_sql_example(r#"```sql
--- Bin the timestamp into 1 day intervals
-> SELECT date_bin(interval '1 day', time) as bin
-FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time);
-+---------------------+
-| bin |
-+---------------------+
-| 2023-01-01T00:00:00 |
-| 2023-01-03T00:00:00 |
-+---------------------+
-2 row(s) fetched.
-
--- Bin the timestamp into 1 day intervals starting at 3AM on 2023-01-01
-> SELECT date_bin(interval '1 day', time, '2023-01-01T03:00:00') as bin
-FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time);
-+---------------------+
-| bin |
-+---------------------+
-| 2023-01-01T03:00:00 |
-| 2023-01-03T03:00:00 |
-+---------------------+
-2 row(s) fetched.
-```
-"#)
- .with_argument("interval", "Bin interval.")
- .with_argument("expression", "Time expression to operate on. Can
be a constant, column, or function.")
- .with_argument("origin-timestamp", "Optional. Starting point used
to determine bin boundaries. If not specified defaults 1970-01-01T00:00:00Z
(the UNIX epoch in UTC).
-
-The following intervals are supported:
-
-- nanoseconds
-- microseconds
-- milliseconds
-- seconds
-- minutes
-- hours
-- days
-- weeks
-- months
-- years
-- century
-")
- .build()
- })
-}
-
enum Interval {
Nanoseconds(i64),
Months(i64),
diff --git a/datafusion/functions/src/datetime/date_part.rs
b/datafusion/functions/src/datetime/date_part.rs
index 0f115563c8..0f01b6a21b 100644
--- a/datafusion/functions/src/datetime/date_part.rs
+++ b/datafusion/functions/src/datetime/date_part.rs
@@ -17,7 +17,7 @@
use std::any::Any;
use std::str::FromStr;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
use arrow::array::{Array, ArrayRef, Float64Array, Int32Array};
use arrow::compute::kernels::cast_utils::IntervalUnit;
@@ -41,11 +41,42 @@ use datafusion_common::{
ExprSchema, Result, ScalarValue,
};
use datafusion_expr::{
- scalar_doc_sections::DOC_SECTION_DATETIME, ColumnarValue, Documentation,
Expr,
- ScalarUDFImpl, Signature, TypeSignature, Volatility,
+ ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature,
TypeSignature,
+ Volatility,
};
use datafusion_expr_common::signature::TypeSignatureClass;
-
+use datafusion_macros::user_doc;
+
+#[user_doc(
+ doc_section(label = "Time and Date Functions"),
+ description = "Returns the specified part of the date as an integer.",
+ syntax_example = "date_part(part, expression)",
+ alternative_syntax = "extract(field FROM source)",
+ argument(
+ name = "part",
+ description = r#"Part of the date to return. The following date parts
are supported:
+
+ - year
+ - quarter (emits value in inclusive range [1, 4] based on which quartile
of the year the date is in)
+ - month
+ - week (week of the year)
+ - day (day of the month)
+ - hour
+ - minute
+ - second
+ - millisecond
+ - microsecond
+ - nanosecond
+ - dow (day of the week)
+ - doy (day of the year)
+ - epoch (seconds since Unix epoch)
+"#
+ ),
+ argument(
+ name = "expression",
+ description = "Time expression to operate on. Can be a constant,
column, or function."
+ )
+)]
#[derive(Debug)]
pub struct DatePartFunc {
signature: Signature,
@@ -190,7 +221,7 @@ impl ScalarUDFImpl for DatePartFunc {
&self.aliases
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_date_part_doc())
+ self.doc()
}
}
@@ -206,43 +237,6 @@ fn part_normalization(part: &str) -> &str {
.unwrap_or(part)
}
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_date_part_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_DATETIME,
- "Returns the specified part of the date as an integer.",
- "date_part(part, expression)")
- .with_argument(
- "part",
- r#"Part of the date to return. The following date parts are
supported:
-
- - year
- - quarter (emits value in inclusive range [1, 4] based on which quartile
of the year the date is in)
- - month
- - week (week of the year)
- - day (day of the month)
- - hour
- - minute
- - second
- - millisecond
- - microsecond
- - nanosecond
- - dow (day of the week)
- - doy (day of the year)
- - epoch (seconds since Unix epoch)
-"#,
- )
- .with_argument(
- "expression",
- "Time expression to operate on. Can be a constant, column, or
function.",
- )
- .with_alternative_syntax("extract(field FROM source)")
- .build()
- })
-}
-
/// Invoke [`date_part`] on an `array` (e.g. Timestamp) and convert the
/// result to a total number of seconds, milliseconds, microseconds or
/// nanoseconds
diff --git a/datafusion/functions/src/datetime/date_trunc.rs
b/datafusion/functions/src/datetime/date_trunc.rs
index b9f3bbf659..4780f5f5b8 100644
--- a/datafusion/functions/src/datetime/date_trunc.rs
+++ b/datafusion/functions/src/datetime/date_trunc.rs
@@ -18,7 +18,7 @@
use std::any::Any;
use std::ops::{Add, Sub};
use std::str::FromStr;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
use arrow::array::temporal_conversions::{
as_datetime_with_timezone, timestamp_ns_to_datetime,
@@ -38,12 +38,35 @@ use datafusion_expr::TypeSignature::Exact;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
TIMEZONE_WILDCARD,
};
+use datafusion_macros::user_doc;
use chrono::{
DateTime, Datelike, Duration, LocalResult, NaiveDateTime, Offset,
TimeDelta, Timelike,
};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
+#[user_doc(
+ doc_section(label = "Time and Date Functions"),
+ description = "Truncates a timestamp value to a specified precision.",
+ syntax_example = "date_trunc(precision, expression)",
+ argument(
+ name = "precision",
+ description = r#"Time precision to truncate to. The following
precisions are supported:
+
+ - year / YEAR
+ - quarter / QUARTER
+ - month / MONTH
+ - week / WEEK
+ - day / DAY
+ - hour / HOUR
+ - minute / MINUTE
+ - second / SECOND
+"#
+ ),
+ argument(
+ name = "expression",
+ description = "Time expression to operate on. Can be a constant,
column, or function."
+ )
+)]
#[derive(Debug)]
pub struct DateTruncFunc {
signature: Signature,
@@ -247,41 +270,10 @@ impl ScalarUDFImpl for DateTruncFunc {
}
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_date_trunc_doc())
+ self.doc()
}
}
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_date_trunc_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_DATETIME,
- "Truncates a timestamp value to a specified precision.",
- "date_trunc(precision, expression)",
- )
- .with_argument(
- "precision",
- r#"Time precision to truncate to. The following precisions are
supported:
-
- - year / YEAR
- - quarter / QUARTER
- - month / MONTH
- - week / WEEK
- - day / DAY
- - hour / HOUR
- - minute / MINUTE
- - second / SECOND
-"#,
- )
- .with_argument(
- "expression",
- "Time expression to operate on. Can be a constant, column, or
function.",
- )
- .build()
- })
-}
-
fn _date_trunc_coarse<T>(granularity: &str, value: Option<T>) ->
Result<Option<T>>
where
T: Datelike + Timelike + Sub<Duration, Output = T> + Copy,
diff --git a/datafusion/functions/src/datetime/from_unixtime.rs
b/datafusion/functions/src/datetime/from_unixtime.rs
index 374c744915..425da7ddac 100644
--- a/datafusion/functions/src/datetime/from_unixtime.rs
+++ b/datafusion/functions/src/datetime/from_unixtime.rs
@@ -16,18 +16,36 @@
// under the License.
use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
use arrow::datatypes::DataType;
use arrow::datatypes::DataType::{Int64, Timestamp, Utf8};
use arrow::datatypes::TimeUnit::Second;
use datafusion_common::{exec_err, internal_err, ExprSchema, Result,
ScalarValue};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
use datafusion_expr::TypeSignature::Exact;
use datafusion_expr::{
ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
};
+use datafusion_macros::user_doc;
+#[user_doc(
+ doc_section(label = "Time and Date Functions"),
+ description = "Converts an integer to RFC3339 timestamp format
(`YYYY-MM-DDT00:00:00.000000000Z`). Integers and unsigned integers are
interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`) return
the corresponding timestamp.",
+ syntax_example = "from_unixtime(expression[, timezone])",
+ sql_example = r#"```sql
+> select from_unixtime(1599572549, 'America/New_York');
++-----------------------------------------------------------+
+| from_unixtime(Int64(1599572549),Utf8("America/New_York")) |
++-----------------------------------------------------------+
+| 2020-09-08T09:42:29-04:00 |
++-----------------------------------------------------------+
+```"#,
+ standard_argument(name = "expression",),
+ argument(
+ name = "timezone",
+ description = "Optional timezone to use when converting the integer to
a timestamp. If not provided, the default timezone is UTC."
+ )
+)]
#[derive(Debug)]
pub struct FromUnixtimeFunc {
signature: Signature,
@@ -125,35 +143,10 @@ impl ScalarUDFImpl for FromUnixtimeFunc {
}
fn documentation(&self) -> Option<&Documentation> {
- Some(get_from_unixtime_doc())
+ self.doc()
}
}
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_from_unixtime_doc() -> &'static Documentation {
- DOCUMENTATION.get_or_init(|| {
- Documentation::builder(
- DOC_SECTION_DATETIME,
- "Converts an integer to RFC3339 timestamp format
(`YYYY-MM-DDT00:00:00.000000000Z`). Integers and unsigned integers are
interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`) return
the corresponding timestamp.",
- "from_unixtime(expression[, timezone])")
- .with_standard_argument("expression", None)
- .with_argument(
- "timezone",
- "Optional timezone to use when converting the integer to a
timestamp. If not provided, the default timezone is UTC.",
- )
- .with_sql_example(r#"```sql
-> select from_unixtime(1599572549, 'America/New_York');
-+-----------------------------------------------------------+
-| from_unixtime(Int64(1599572549),Utf8("America/New_York")) |
-+-----------------------------------------------------------+
-| 2020-09-08T09:42:29-04:00 |
-+-----------------------------------------------------------+
-```"#)
- .build()
- })
-}
-
#[cfg(test)]
mod test {
use crate::datetime::from_unixtime::FromUnixtimeFunc;
diff --git a/docs/source/user-guide/sql/scalar_functions.md
b/docs/source/user-guide/sql/scalar_functions.md
index 081509165e..56cc8e10fb 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -1986,21 +1986,19 @@ date_bin(interval, expression, origin-timestamp)
- **interval**: Bin interval.
- **expression**: Time expression to operate on. Can be a constant, column, or
function.
-- **origin-timestamp**: Optional. Starting point used to determine bin
boundaries. If not specified defaults 1970-01-01T00:00:00Z (the UNIX epoch in
UTC).
-
-The following intervals are supported:
-
-- nanoseconds
-- microseconds
-- milliseconds
-- seconds
-- minutes
-- hours
-- days
-- weeks
-- months
-- years
-- century
+- **origin-timestamp**: Optional. Starting point used to determine bin
boundaries. If not specified defaults 1970-01-01T00:00:00Z (the UNIX epoch in
UTC). The following intervals are supported:
+
+ - nanoseconds
+ - microseconds
+ - milliseconds
+ - seconds
+ - minutes
+ - hours
+ - days
+ - weeks
+ - months
+ - years
+ - century
#### Example
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]