This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new a203c2b167 doc-gen: migrate scalar functions (datetime) documentation 
1/2 (#13920)
a203c2b167 is described below

commit a203c2b167123eba4a2f36df4f954a6fecf536a5
Author: Ian Lai <[email protected]>
AuthorDate: Sun Dec 29 21:11:35 2024 +0800

    doc-gen: migrate scalar functions (datetime) documentation 1/2 (#13920)
    
    * doc-gen: migrate scalar functions (datetime) documentation 1/2
    
    * fix: fix typo and update function docs
    
    ---------
    
    Co-authored-by: Cheng-Yuan-Lai <a186235@g,ail.com>
---
 datafusion/functions/src/datetime/current_date.rs  |  30 ++----
 datafusion/functions/src/datetime/current_time.rs  |  30 ++----
 datafusion/functions/src/datetime/date_bin.rs      | 118 ++++++++++-----------
 datafusion/functions/src/datetime/date_part.rs     |  78 +++++++-------
 datafusion/functions/src/datetime/date_trunc.rs    |  60 +++++------
 datafusion/functions/src/datetime/from_unixtime.rs |  49 ++++-----
 docs/source/user-guide/sql/scalar_functions.md     |  28 +++--
 7 files changed, 175 insertions(+), 218 deletions(-)

diff --git a/datafusion/functions/src/datetime/current_date.rs 
b/datafusion/functions/src/datetime/current_date.rs
index 97d97939d3..868cbe23d6 100644
--- a/datafusion/functions/src/datetime/current_date.rs
+++ b/datafusion/functions/src/datetime/current_date.rs
@@ -22,13 +22,21 @@ use arrow::datatypes::DataType::Date32;
 use chrono::{Datelike, NaiveDate};
 
 use datafusion_common::{internal_err, Result, ScalarValue};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
 use datafusion_expr::{
     ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
 };
-use std::sync::OnceLock;
+use datafusion_macros::user_doc;
 
+#[user_doc(
+    doc_section(label = "Time and Date Functions"),
+    description = r#"
+Returns the current UTC date.
+
+The `current_date()` return value is determined at query time and will return 
the same date, no matter when in the query plan the function executes.
+"#,
+    syntax_example = "current_date()"
+)]
 #[derive(Debug)]
 pub struct CurrentDateFunc {
     signature: Signature,
@@ -105,22 +113,6 @@ impl ScalarUDFImpl for CurrentDateFunc {
     }
 
     fn documentation(&self) -> Option<&Documentation> {
-        Some(get_current_date_doc())
+        self.doc()
     }
 }
-
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_current_date_doc() -> &'static Documentation {
-    DOCUMENTATION.get_or_init(|| {
-        Documentation::builder(
-            DOC_SECTION_DATETIME,
-            r#"
-Returns the current UTC date.
-
-The `current_date()` return value is determined at query time and will return 
the same date, no matter when in the query plan the function executes.
-"#,
-            "current_date()")
-            .build()
-    })
-}
diff --git a/datafusion/functions/src/datetime/current_time.rs 
b/datafusion/functions/src/datetime/current_time.rs
index 1cd39e5777..142184508e 100644
--- a/datafusion/functions/src/datetime/current_time.rs
+++ b/datafusion/functions/src/datetime/current_time.rs
@@ -19,15 +19,23 @@ use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::Time64;
 use arrow::datatypes::TimeUnit::Nanosecond;
 use std::any::Any;
-use std::sync::OnceLock;
 
 use datafusion_common::{internal_err, Result, ScalarValue};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
 use datafusion_expr::{
     ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
 };
+use datafusion_macros::user_doc;
 
+#[user_doc(
+    doc_section(label = "Time and Date Functions"),
+    description = r#"
+Returns the current UTC time.
+
+The `current_time()` return value is determined at query time and will return 
the same time, no matter when in the query plan the function executes.
+"#,
+    syntax_example = "current_time()"
+)]
 #[derive(Debug)]
 pub struct CurrentTimeFunc {
     signature: Signature,
@@ -93,22 +101,6 @@ impl ScalarUDFImpl for CurrentTimeFunc {
     }
 
     fn documentation(&self) -> Option<&Documentation> {
-        Some(get_current_time_doc())
+        self.doc()
     }
 }
-
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_current_time_doc() -> &'static Documentation {
-    DOCUMENTATION.get_or_init(|| {
-        Documentation::builder(
-            DOC_SECTION_DATETIME,
-            r#"
-Returns the current UTC time.
-
-The `current_time()` return value is determined at query time and will return 
the same time, no matter when in the query plan the function executes.
-"#,
-            "current_time()")
-            .build()
-    })
-}
diff --git a/datafusion/functions/src/datetime/date_bin.rs 
b/datafusion/functions/src/datetime/date_bin.rs
index bb3f2177b9..a288693699 100644
--- a/datafusion/functions/src/datetime/date_bin.rs
+++ b/datafusion/functions/src/datetime/date_bin.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
 
 use arrow::array::temporal_conversions::NANOSECONDS;
 use arrow::array::types::{
@@ -37,10 +37,64 @@ use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, 
TIMEZONE_WILDCARD,
 };
+use datafusion_macros::user_doc;
 
 use chrono::{DateTime, Datelike, Duration, Months, TimeDelta, Utc};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
 
+#[user_doc(
+    doc_section(label = "Time and Date Functions"),
+    description = r#"
+Calculates time intervals and returns the start of the interval nearest to the 
specified timestamp. Use `date_bin` to downsample time series data by grouping 
rows into time-based "bins" or "windows" and applying an aggregate or selector 
function to each window.
+
+For example, if you "bin" or "window" data into 15 minute intervals, an input 
timestamp of `2023-01-01T18:18:18Z` will be updated to the start time of the 15 
minute bin it is in: `2023-01-01T18:15:00Z`.
+"#,
+    syntax_example = "date_bin(interval, expression, origin-timestamp)",
+    sql_example = r#"```sql
+-- Bin the timestamp into 1 day intervals
+> SELECT date_bin(interval '1 day', time) as bin
+FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z')  t(time);
++---------------------+
+| bin                 |
++---------------------+
+| 2023-01-01T00:00:00 |
+| 2023-01-03T00:00:00 |
++---------------------+
+2 row(s) fetched.
+
+-- Bin the timestamp into 1 day intervals starting at 3AM on  2023-01-01
+> SELECT date_bin(interval '1 day', time,  '2023-01-01T03:00:00') as bin
+FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z')  t(time);
++---------------------+
+| bin                 |
++---------------------+
+| 2023-01-01T03:00:00 |
+| 2023-01-03T03:00:00 |
++---------------------+
+2 row(s) fetched.
+```"#,
+    argument(name = "interval", description = "Bin interval."),
+    argument(
+        name = "expression",
+        description = "Time expression to operate on. Can be a constant, 
column, or function."
+    ),
+    argument(
+        name = "origin-timestamp",
+        description = r#"Optional. Starting point used to determine bin 
boundaries. If not specified defaults 1970-01-01T00:00:00Z (the UNIX epoch in 
UTC). The following intervals are supported:
+
+    - nanoseconds
+    - microseconds
+    - milliseconds
+    - seconds
+    - minutes
+    - hours
+    - days
+    - weeks
+    - months
+    - years
+    - century
+"#
+    )
+)]
 #[derive(Debug)]
 pub struct DateBinFunc {
     signature: Signature,
@@ -169,68 +223,10 @@ impl ScalarUDFImpl for DateBinFunc {
         }
     }
     fn documentation(&self) -> Option<&Documentation> {
-        Some(get_date_bin_doc())
+        self.doc()
     }
 }
 
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_date_bin_doc() -> &'static Documentation {
-    DOCUMENTATION.get_or_init(|| {
-        Documentation::builder(
-            DOC_SECTION_DATETIME,
-            r#"
-Calculates time intervals and returns the start of the interval nearest to the 
specified timestamp. Use `date_bin` to downsample time series data by grouping 
rows into time-based "bins" or "windows" and applying an aggregate or selector 
function to each window.
-
-For example, if you "bin" or "window" data into 15 minute intervals, an input 
timestamp of `2023-01-01T18:18:18Z` will be updated to the start time of the 15 
minute bin it is in: `2023-01-01T18:15:00Z`.
-"#,
-            "date_bin(interval, expression, origin-timestamp)")
-            .with_sql_example(r#"```sql
--- Bin the timestamp into 1 day intervals
-> SELECT date_bin(interval '1 day', time) as bin
-FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z')  t(time);
-+---------------------+
-| bin                 |
-+---------------------+
-| 2023-01-01T00:00:00 |
-| 2023-01-03T00:00:00 |
-+---------------------+
-2 row(s) fetched.
-
--- Bin the timestamp into 1 day intervals starting at 3AM on  2023-01-01
-> SELECT date_bin(interval '1 day', time,  '2023-01-01T03:00:00') as bin
-FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z')  t(time);
-+---------------------+
-| bin                 |
-+---------------------+
-| 2023-01-01T03:00:00 |
-| 2023-01-03T03:00:00 |
-+---------------------+
-2 row(s) fetched.
-```
-"#)
-            .with_argument("interval", "Bin interval.")
-            .with_argument("expression", "Time expression to operate on. Can 
be a constant, column, or function.")
-            .with_argument("origin-timestamp", "Optional. Starting point used 
to determine bin boundaries. If not specified defaults 1970-01-01T00:00:00Z 
(the UNIX epoch in UTC).
-
-The following intervals are supported:
-
-- nanoseconds
-- microseconds
-- milliseconds
-- seconds
-- minutes
-- hours
-- days
-- weeks
-- months
-- years
-- century
-")
-            .build()
-    })
-}
-
 enum Interval {
     Nanoseconds(i64),
     Months(i64),
diff --git a/datafusion/functions/src/datetime/date_part.rs 
b/datafusion/functions/src/datetime/date_part.rs
index 0f115563c8..0f01b6a21b 100644
--- a/datafusion/functions/src/datetime/date_part.rs
+++ b/datafusion/functions/src/datetime/date_part.rs
@@ -17,7 +17,7 @@
 
 use std::any::Any;
 use std::str::FromStr;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
 
 use arrow::array::{Array, ArrayRef, Float64Array, Int32Array};
 use arrow::compute::kernels::cast_utils::IntervalUnit;
@@ -41,11 +41,42 @@ use datafusion_common::{
     ExprSchema, Result, ScalarValue,
 };
 use datafusion_expr::{
-    scalar_doc_sections::DOC_SECTION_DATETIME, ColumnarValue, Documentation, 
Expr,
-    ScalarUDFImpl, Signature, TypeSignature, Volatility,
+    ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, 
TypeSignature,
+    Volatility,
 };
 use datafusion_expr_common::signature::TypeSignatureClass;
-
+use datafusion_macros::user_doc;
+
+#[user_doc(
+    doc_section(label = "Time and Date Functions"),
+    description = "Returns the specified part of the date as an integer.",
+    syntax_example = "date_part(part, expression)",
+    alternative_syntax = "extract(field FROM source)",
+    argument(
+        name = "part",
+        description = r#"Part of the date to return. The following date parts 
are supported:
+        
+    - year
+    - quarter (emits value in inclusive range [1, 4] based on which quartile 
of the year the date is in)
+    - month
+    - week (week of the year)
+    - day (day of the month)
+    - hour
+    - minute
+    - second
+    - millisecond
+    - microsecond
+    - nanosecond
+    - dow (day of the week)
+    - doy (day of the year)
+    - epoch (seconds since Unix epoch)
+"#
+    ),
+    argument(
+        name = "expression",
+        description = "Time expression to operate on. Can be a constant, 
column, or function."
+    )
+)]
 #[derive(Debug)]
 pub struct DatePartFunc {
     signature: Signature,
@@ -190,7 +221,7 @@ impl ScalarUDFImpl for DatePartFunc {
         &self.aliases
     }
     fn documentation(&self) -> Option<&Documentation> {
-        Some(get_date_part_doc())
+        self.doc()
     }
 }
 
@@ -206,43 +237,6 @@ fn part_normalization(part: &str) -> &str {
         .unwrap_or(part)
 }
 
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_date_part_doc() -> &'static Documentation {
-    DOCUMENTATION.get_or_init(|| {
-        Documentation::builder(
-            DOC_SECTION_DATETIME,
-            "Returns the specified part of the date as an integer.",
-            "date_part(part, expression)")
-            .with_argument(
-                "part",
-                r#"Part of the date to return. The following date parts are 
supported:
-
-    - year
-    - quarter (emits value in inclusive range [1, 4] based on which quartile 
of the year the date is in)
-    - month
-    - week (week of the year)
-    - day (day of the month)
-    - hour
-    - minute
-    - second
-    - millisecond
-    - microsecond
-    - nanosecond
-    - dow (day of the week)
-    - doy (day of the year)
-    - epoch (seconds since Unix epoch)
-"#,
-            )
-            .with_argument(
-                "expression",
-                "Time expression to operate on. Can be a constant, column, or 
function.",
-            )
-            .with_alternative_syntax("extract(field FROM source)")
-            .build()
-    })
-}
-
 /// Invoke [`date_part`] on an `array` (e.g. Timestamp) and convert the
 /// result to a total number of seconds, milliseconds, microseconds or
 /// nanoseconds
diff --git a/datafusion/functions/src/datetime/date_trunc.rs 
b/datafusion/functions/src/datetime/date_trunc.rs
index b9f3bbf659..4780f5f5b8 100644
--- a/datafusion/functions/src/datetime/date_trunc.rs
+++ b/datafusion/functions/src/datetime/date_trunc.rs
@@ -18,7 +18,7 @@
 use std::any::Any;
 use std::ops::{Add, Sub};
 use std::str::FromStr;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
 
 use arrow::array::temporal_conversions::{
     as_datetime_with_timezone, timestamp_ns_to_datetime,
@@ -38,12 +38,35 @@ use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, 
TIMEZONE_WILDCARD,
 };
+use datafusion_macros::user_doc;
 
 use chrono::{
     DateTime, Datelike, Duration, LocalResult, NaiveDateTime, Offset, 
TimeDelta, Timelike,
 };
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
 
+#[user_doc(
+    doc_section(label = "Time and Date Functions"),
+    description = "Truncates a timestamp value to a specified precision.",
+    syntax_example = "date_trunc(precision, expression)",
+    argument(
+        name = "precision",
+        description = r#"Time precision to truncate to. The following 
precisions are supported:
+
+    - year / YEAR
+    - quarter / QUARTER
+    - month / MONTH
+    - week / WEEK
+    - day / DAY
+    - hour / HOUR
+    - minute / MINUTE
+    - second / SECOND
+"#
+    ),
+    argument(
+        name = "expression",
+        description = "Time expression to operate on. Can be a constant, 
column, or function."
+    )
+)]
 #[derive(Debug)]
 pub struct DateTruncFunc {
     signature: Signature,
@@ -247,41 +270,10 @@ impl ScalarUDFImpl for DateTruncFunc {
         }
     }
     fn documentation(&self) -> Option<&Documentation> {
-        Some(get_date_trunc_doc())
+        self.doc()
     }
 }
 
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_date_trunc_doc() -> &'static Documentation {
-    DOCUMENTATION.get_or_init(|| {
-        Documentation::builder(
-            DOC_SECTION_DATETIME,
-            "Truncates a timestamp value to a specified precision.",
-            "date_trunc(precision, expression)",
-        )
-        .with_argument(
-            "precision",
-            r#"Time precision to truncate to. The following precisions are 
supported:
-
-    - year / YEAR
-    - quarter / QUARTER
-    - month / MONTH
-    - week / WEEK
-    - day / DAY
-    - hour / HOUR
-    - minute / MINUTE
-    - second / SECOND
-"#,
-        )
-        .with_argument(
-            "expression",
-            "Time expression to operate on. Can be a constant, column, or 
function.",
-        )
-        .build()
-    })
-}
-
 fn _date_trunc_coarse<T>(granularity: &str, value: Option<T>) -> 
Result<Option<T>>
 where
     T: Datelike + Timelike + Sub<Duration, Output = T> + Copy,
diff --git a/datafusion/functions/src/datetime/from_unixtime.rs 
b/datafusion/functions/src/datetime/from_unixtime.rs
index 374c744915..425da7ddac 100644
--- a/datafusion/functions/src/datetime/from_unixtime.rs
+++ b/datafusion/functions/src/datetime/from_unixtime.rs
@@ -16,18 +16,36 @@
 // under the License.
 
 use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::Arc;
 
 use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::{Int64, Timestamp, Utf8};
 use arrow::datatypes::TimeUnit::Second;
 use datafusion_common::{exec_err, internal_err, ExprSchema, Result, 
ScalarValue};
-use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{
     ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
 };
+use datafusion_macros::user_doc;
 
+#[user_doc(
+    doc_section(label = "Time and Date Functions"),
+    description = "Converts an integer to RFC3339 timestamp format 
(`YYYY-MM-DDT00:00:00.000000000Z`). Integers and unsigned integers are 
interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`) return 
the corresponding timestamp.",
+    syntax_example = "from_unixtime(expression[, timezone])",
+    sql_example = r#"```sql
+> select from_unixtime(1599572549, 'America/New_York');
++-----------------------------------------------------------+
+| from_unixtime(Int64(1599572549),Utf8("America/New_York")) |
++-----------------------------------------------------------+
+| 2020-09-08T09:42:29-04:00                                 |
++-----------------------------------------------------------+
+```"#,
+    standard_argument(name = "expression",),
+    argument(
+        name = "timezone",
+        description = "Optional timezone to use when converting the integer to 
a timestamp. If not provided, the default timezone is UTC."
+    )
+)]
 #[derive(Debug)]
 pub struct FromUnixtimeFunc {
     signature: Signature,
@@ -125,35 +143,10 @@ impl ScalarUDFImpl for FromUnixtimeFunc {
     }
 
     fn documentation(&self) -> Option<&Documentation> {
-        Some(get_from_unixtime_doc())
+        self.doc()
     }
 }
 
-static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
-
-fn get_from_unixtime_doc() -> &'static Documentation {
-    DOCUMENTATION.get_or_init(|| {
-        Documentation::builder(
-            DOC_SECTION_DATETIME,
-            "Converts an integer to RFC3339 timestamp format 
(`YYYY-MM-DDT00:00:00.000000000Z`). Integers and unsigned integers are 
interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`) return 
the corresponding timestamp.",
-            "from_unixtime(expression[, timezone])")
-            .with_standard_argument("expression", None)
-            .with_argument(
-                "timezone",
-                "Optional timezone to use when converting the integer to a 
timestamp. If not provided, the default timezone is UTC.",
-            )
-            .with_sql_example(r#"```sql
-> select from_unixtime(1599572549, 'America/New_York');
-+-----------------------------------------------------------+
-| from_unixtime(Int64(1599572549),Utf8("America/New_York")) |
-+-----------------------------------------------------------+
-| 2020-09-08T09:42:29-04:00                                 |
-+-----------------------------------------------------------+
-```"#)
-            .build()
-    })
-}
-
 #[cfg(test)]
 mod test {
     use crate::datetime::from_unixtime::FromUnixtimeFunc;
diff --git a/docs/source/user-guide/sql/scalar_functions.md 
b/docs/source/user-guide/sql/scalar_functions.md
index 081509165e..56cc8e10fb 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -1986,21 +1986,19 @@ date_bin(interval, expression, origin-timestamp)
 
 - **interval**: Bin interval.
 - **expression**: Time expression to operate on. Can be a constant, column, or 
function.
-- **origin-timestamp**: Optional. Starting point used to determine bin 
boundaries. If not specified defaults 1970-01-01T00:00:00Z (the UNIX epoch in 
UTC).
-
-The following intervals are supported:
-
-- nanoseconds
-- microseconds
-- milliseconds
-- seconds
-- minutes
-- hours
-- days
-- weeks
-- months
-- years
-- century
+- **origin-timestamp**: Optional. Starting point used to determine bin 
boundaries. If not specified defaults 1970-01-01T00:00:00Z (the UNIX epoch in 
UTC). The following intervals are supported:
+
+  - nanoseconds
+  - microseconds
+  - milliseconds
+  - seconds
+  - minutes
+  - hours
+  - days
+  - weeks
+  - months
+  - years
+  - century
 
 #### Example
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to