This is an automated email from the ASF dual-hosted git repository.
kosiew pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-python.git
The following commit(s) were added to refs/heads/main by this push:
new f01f30c6 Add docstring examples for Scalar temporal functions (#1424)
f01f30c6 is described below
commit f01f30c6332e40208e9f943a163a66e3d2781d08
Author: Nick <[email protected]>
AuthorDate: Wed Mar 18 01:51:06 2026 -0400
Add docstring examples for Scalar temporal functions (#1424)
* Add docstring examples for Scalar temporal functions
Add example usage to docstrings for Scalar temporal functions to improve
documentation.
Co-Authored-By: Claude Opus 4.6 <[email protected]>
* Remove examples for aliases
* Fix claude's attempt to cheat with sql
* Make examples follow google docstyle
---------
Co-authored-by: Claude Opus 4.6 <[email protected]>
---
python/datafusion/functions.py | 178 +++++++++++++++++++++++++++++++++++++++--
1 file changed, 170 insertions(+), 8 deletions(-)
diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py
index 765d1365..f457e2c9 100644
--- a/python/datafusion/functions.py
+++ b/python/datafusion/functions.py
@@ -1562,6 +1562,19 @@ def now() -> Expr:
"""Returns the current timestamp in nanoseconds.
This will use the same value for all instances of now() in same statement.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [1]})
+ >>> result = df.select(
+ ... dfn.functions.now().alias("now")
+ ... )
+
+ Use .value instead of .as_py() because nanosecond timestamps
+ require pandas to convert to Python datetime objects.
+
+ >>> result.collect_column("now")[0].value > 0
+ True
"""
return Expr(f.now())
@@ -1622,6 +1635,17 @@ def to_timestamp(arg: Expr, *formatters: Expr) -> Expr:
For usage of ``formatters`` see the rust chrono package ``strftime``
package.
[Documentation
here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": ["2021-01-01T00:00:00"]})
+ >>> result = df.select(
+ ... dfn.functions.to_timestamp(
+ ... dfn.col("a")
+ ... ).alias("ts")
+ ... )
+ >>> str(result.collect_column("ts")[0].as_py())
+ '2021-01-01 00:00:00'
"""
return Expr(f.to_timestamp(arg.expr, *_unwrap_exprs(formatters)))
@@ -1630,6 +1654,17 @@ def to_timestamp_millis(arg: Expr, *formatters: Expr) ->
Expr:
"""Converts a string and optional formats to a ``Timestamp`` in
milliseconds.
See :py:func:`to_timestamp` for a description on how to use formatters.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": ["2021-01-01T00:00:00"]})
+ >>> result = df.select(
+ ... dfn.functions.to_timestamp_millis(
+ ... dfn.col("a")
+ ... ).alias("ts")
+ ... )
+ >>> str(result.collect_column("ts")[0].as_py())
+ '2021-01-01 00:00:00'
"""
return Expr(f.to_timestamp_millis(arg.expr, *_unwrap_exprs(formatters)))
@@ -1638,6 +1673,17 @@ def to_timestamp_micros(arg: Expr, *formatters: Expr) ->
Expr:
"""Converts a string and optional formats to a ``Timestamp`` in
microseconds.
See :py:func:`to_timestamp` for a description on how to use formatters.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": ["2021-01-01T00:00:00"]})
+ >>> result = df.select(
+ ... dfn.functions.to_timestamp_micros(
+ ... dfn.col("a")
+ ... ).alias("ts")
+ ... )
+ >>> str(result.collect_column("ts")[0].as_py())
+ '2021-01-01 00:00:00'
"""
return Expr(f.to_timestamp_micros(arg.expr, *_unwrap_exprs(formatters)))
@@ -1646,6 +1692,17 @@ def to_timestamp_nanos(arg: Expr, *formatters: Expr) ->
Expr:
"""Converts a string and optional formats to a ``Timestamp`` in
nanoseconds.
See :py:func:`to_timestamp` for a description on how to use formatters.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": ["2021-01-01T00:00:00"]})
+ >>> result = df.select(
+ ... dfn.functions.to_timestamp_nanos(
+ ... dfn.col("a")
+ ... ).alias("ts")
+ ... )
+ >>> str(result.collect_column("ts")[0].as_py())
+ '2021-01-01 00:00:00'
"""
return Expr(f.to_timestamp_nanos(arg.expr, *_unwrap_exprs(formatters)))
@@ -1654,17 +1711,46 @@ def to_timestamp_seconds(arg: Expr, *formatters: Expr)
-> Expr:
"""Converts a string and optional formats to a ``Timestamp`` in seconds.
See :py:func:`to_timestamp` for a description on how to use formatters.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": ["2021-01-01T00:00:00"]})
+ >>> result = df.select(
+ ... dfn.functions.to_timestamp_seconds(
+ ... dfn.col("a")
+ ... ).alias("ts")
+ ... )
+ >>> str(result.collect_column("ts")[0].as_py())
+ '2021-01-01 00:00:00'
"""
return Expr(f.to_timestamp_seconds(arg.expr, *_unwrap_exprs(formatters)))
def to_unixtime(string: Expr, *format_arguments: Expr) -> Expr:
- """Converts a string and optional formats to a Unixtime."""
+ """Converts a string and optional formats to a Unixtime.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": ["1970-01-01T00:00:00"]})
+ >>> result =
df.select(dfn.functions.to_unixtime(dfn.col("a")).alias("u"))
+ >>> result.collect_column("u")[0].as_py()
+ 0
+ """
return Expr(f.to_unixtime(string.expr, *_unwrap_exprs(format_arguments)))
def current_date() -> Expr:
- """Returns current UTC date as a Date32 value."""
+ """Returns current UTC date as a Date32 value.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [1]})
+ >>> result = df.select(
+ ... dfn.functions.current_date().alias("d")
+ ... )
+ >>> result.collect_column("d")[0].as_py() is not None
+ True
+ """
return Expr(f.current_date())
@@ -1672,7 +1758,21 @@ today = current_date
def current_time() -> Expr:
- """Returns current UTC time as a Time64 value."""
+ """Returns current UTC time as a Time64 value.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [1]})
+ >>> result = df.select(
+ ... dfn.functions.current_time().alias("t")
+ ... )
+
+ Use .value instead of .as_py() because nanosecond timestamps
+ require pandas to convert to Python datetime objects.
+
+ >>> result.collect_column("t")[0].value > 0
+ True
+ """
return Expr(f.current_time())
@@ -1685,7 +1785,17 @@ def datepart(part: Expr, date: Expr) -> Expr:
def date_part(part: Expr, date: Expr) -> Expr:
- """Extracts a subfield from the date."""
+ """Extracts a subfield from the date.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": ["2021-07-15T00:00:00"]})
+ >>> df = df.select(dfn.functions.to_timestamp(dfn.col("a")).alias("a"))
+ >>> result = df.select(
+ ... dfn.functions.date_part(dfn.lit("year"),
dfn.col("a")).alias("y"))
+ >>> result.collect_column("y")[0].as_py()
+ 2021
+ """
return Expr(f.date_part(part.expr, date.expr))
@@ -1698,7 +1808,20 @@ def extract(part: Expr, date: Expr) -> Expr:
def date_trunc(part: Expr, date: Expr) -> Expr:
- """Truncates the date to a specified level of precision."""
+ """Truncates the date to a specified level of precision.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": ["2021-07-15T12:34:56"]})
+ >>> df = df.select(dfn.functions.to_timestamp(dfn.col("a")).alias("a"))
+ >>> result = df.select(
+ ... dfn.functions.date_trunc(
+ ... dfn.lit("month"), dfn.col("a")
+ ... ).alias("t")
+ ... )
+ >>> str(result.collect_column("t")[0].as_py())
+ '2021-07-01 00:00:00'
+ """
return Expr(f.date_trunc(part.expr, date.expr))
@@ -1711,12 +1834,39 @@ def datetrunc(part: Expr, date: Expr) -> Expr:
def date_bin(stride: Expr, source: Expr, origin: Expr) -> Expr:
- """Coerces an arbitrary timestamp to the start of the nearest specified
interval."""
+ """Coerces an arbitrary timestamp to the start of the nearest specified
interval.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"timestamp": ['2021-07-15 12:34:56',
'2021-01-01']})
+ >>> result = df.select(
+ ... dfn.functions.date_bin(
+ ... dfn.string_literal("15 minutes"),
+ ... dfn.col("timestamp"),
+ ... dfn.string_literal("2001-01-01 00:00:00")
+ ... ).alias("b")
+ ... )
+ >>> str(result.collect_column("b")[0].as_py())
+ '2021-07-15 12:30:00'
+ >>> str(result.collect_column("b")[1].as_py())
+ '2021-01-01 00:00:00'
+ """
return Expr(f.date_bin(stride.expr, source.expr, origin.expr))
def make_date(year: Expr, month: Expr, day: Expr) -> Expr:
- """Make a date from year, month and day component parts."""
+ """Make a date from year, month and day component parts.
+
+ Examples:
+ >>> from datetime import date
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"y": [2024], "m": [1], "d": [15]})
+ >>> result = df.select(
+ ... dfn.functions.make_date(dfn.col("y"), dfn.col("m"),
+ ... dfn.col("d")).alias("dt"))
+ >>> result.collect_column("dt")[0].as_py()
+ datetime.date(2024, 1, 15)
+ """
return Expr(f.make_date(year.expr, month.expr, day.expr))
@@ -1839,7 +1989,19 @@ def named_struct(name_pairs: list[tuple[str, Expr]]) ->
Expr:
def from_unixtime(arg: Expr) -> Expr:
- """Converts an integer to RFC3339 timestamp format string."""
+ """Converts an integer to RFC3339 timestamp format string.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [0]})
+ >>> result = df.select(
+ ... dfn.functions.from_unixtime(
+ ... dfn.col("a")
+ ... ).alias("ts")
+ ... )
+ >>> str(result.collect_column("ts")[0].as_py())
+ '1970-01-01 00:00:00'
+ """
return Expr(f.from_unixtime(arg.expr))
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]