This is an automated email from the ASF dual-hosted git repository.
kosiew pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-python.git
The following commit(s) were added to refs/heads/main by this push:
new d322b7b7 feat: feat: add to_time, to_local_time, to_date functions
(#1387)
d322b7b7 is described below
commit d322b7b7bfd527370f03854717661488737c9f8b
Author: Daniel Mesejo <[email protected]>
AuthorDate: Mon Mar 9 07:52:47 2026 +0100
feat: feat: add to_time, to_local_time, to_date functions (#1387)
* feat: add to_time, to_local_time, to_date, to_char functions
Additionally fix conditional on formatters (since it is *args it cannot be
None)
Refactor name to avoid possible collision with f.
* address comments in PR
* chore: add tests for today
---
python/datafusion/functions.py | 80 +++++++++++++++++++++++++++++++++---------
python/tests/test_functions.py | 75 ++++++++++++++++++++++++++++++++++++++-
src/functions.rs | 8 +++++
3 files changed, 146 insertions(+), 17 deletions(-)
diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py
index 431afcc3..9723be5c 100644
--- a/python/datafusion/functions.py
+++ b/python/datafusion/functions.py
@@ -42,7 +42,6 @@ except ImportError:
if TYPE_CHECKING:
from datafusion.context import SessionContext
-
__all__ = [
"abs",
"acos",
@@ -268,13 +267,18 @@ __all__ = [
"sum",
"tan",
"tanh",
+ "to_char",
+ "to_date",
"to_hex",
+ "to_local_time",
+ "to_time",
"to_timestamp",
"to_timestamp_micros",
"to_timestamp_millis",
"to_timestamp_nanos",
"to_timestamp_seconds",
"to_unixtime",
+ "today",
"translate",
"trim",
"trunc",
@@ -1010,6 +1014,56 @@ def now() -> Expr:
return Expr(f.now())
+def to_char(arg: Expr, formatter: Expr) -> Expr:
+ """Returns a string representation of a date, time, timestamp or duration.
+
+ For usage of ``formatter`` see the rust chrono package ``strftime``
package.
+
+ [Documentation
here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
+ """
+ return Expr(f.to_char(arg.expr, formatter.expr))
+
+
+def _unwrap_exprs(args: tuple[Expr, ...]) -> list:
+ return [arg.expr for arg in args]
+
+
+def to_date(arg: Expr, *formatters: Expr) -> Expr:
+ """Converts a value to a date (YYYY-MM-DD).
+
+ Supports strings, numeric and timestamp types as input.
+ Integers and doubles are interpreted as days since the unix epoch.
+ Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20')
+ if ``formatters`` are not provided.
+
+ For usage of ``formatters`` see the rust chrono package ``strftime``
package.
+
+ [Documentation
here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
+ """
+ return Expr(f.to_date(arg.expr, *_unwrap_exprs(formatters)))
+
+
+def to_local_time(*args: Expr) -> Expr:
+ """Converts a timestamp with a timezone to a timestamp without a timezone.
+
+ This function handles daylight saving time changes.
+ """
+ return Expr(f.to_local_time(*_unwrap_exprs(args)))
+
+
+def to_time(arg: Expr, *formatters: Expr) -> Expr:
+ """Converts a value to a time. Supports strings and timestamps as input.
+
+ If ``formatters`` is not provided strings are parsed as HH:MM:SS, HH:MM or
+ HH:MM:SS.nnnnnnnnn;
+
+ For usage of ``formatters`` see the rust chrono package ``strftime``
package.
+
+ [Documentation
here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
+ """
+ return Expr(f.to_time(arg.expr, *_unwrap_exprs(formatters)))
+
+
def to_timestamp(arg: Expr, *formatters: Expr) -> Expr:
"""Converts a string and optional formats to a ``Timestamp`` in
nanoseconds.
@@ -1017,11 +1071,7 @@ def to_timestamp(arg: Expr, *formatters: Expr) -> Expr:
[Documentation
here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
"""
- if formatters is None:
- return f.to_timestamp(arg.expr)
-
- formatters = [f.expr for f in formatters]
- return Expr(f.to_timestamp(arg.expr, *formatters))
+ return Expr(f.to_timestamp(arg.expr, *_unwrap_exprs(formatters)))
def to_timestamp_millis(arg: Expr, *formatters: Expr) -> Expr:
@@ -1029,8 +1079,7 @@ def to_timestamp_millis(arg: Expr, *formatters: Expr) ->
Expr:
See :py:func:`to_timestamp` for a description on how to use formatters.
"""
- formatters = [f.expr for f in formatters]
- return Expr(f.to_timestamp_millis(arg.expr, *formatters))
+ return Expr(f.to_timestamp_millis(arg.expr, *_unwrap_exprs(formatters)))
def to_timestamp_micros(arg: Expr, *formatters: Expr) -> Expr:
@@ -1038,8 +1087,7 @@ def to_timestamp_micros(arg: Expr, *formatters: Expr) ->
Expr:
See :py:func:`to_timestamp` for a description on how to use formatters.
"""
- formatters = [f.expr for f in formatters]
- return Expr(f.to_timestamp_micros(arg.expr, *formatters))
+ return Expr(f.to_timestamp_micros(arg.expr, *_unwrap_exprs(formatters)))
def to_timestamp_nanos(arg: Expr, *formatters: Expr) -> Expr:
@@ -1047,8 +1095,7 @@ def to_timestamp_nanos(arg: Expr, *formatters: Expr) ->
Expr:
See :py:func:`to_timestamp` for a description on how to use formatters.
"""
- formatters = [f.expr for f in formatters]
- return Expr(f.to_timestamp_nanos(arg.expr, *formatters))
+ return Expr(f.to_timestamp_nanos(arg.expr, *_unwrap_exprs(formatters)))
def to_timestamp_seconds(arg: Expr, *formatters: Expr) -> Expr:
@@ -1056,14 +1103,12 @@ def to_timestamp_seconds(arg: Expr, *formatters: Expr)
-> Expr:
See :py:func:`to_timestamp` for a description on how to use formatters.
"""
- formatters = [f.expr for f in formatters]
- return Expr(f.to_timestamp_seconds(arg.expr, *formatters))
+ return Expr(f.to_timestamp_seconds(arg.expr, *_unwrap_exprs(formatters)))
def to_unixtime(string: Expr, *format_arguments: Expr) -> Expr:
"""Converts a string and optional formats to a Unixtime."""
- args = [f.expr for f in format_arguments]
- return Expr(f.to_unixtime(string.expr, *args))
+ return Expr(f.to_unixtime(string.expr, *_unwrap_exprs(format_arguments)))
def current_date() -> Expr:
@@ -1071,6 +1116,9 @@ def current_date() -> Expr:
return Expr(f.current_date())
+today = current_date
+
+
def current_time() -> Expr:
"""Returns current UTC time as a Time64 value."""
return Expr(f.current_time())
diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py
index 7d642b72..37d349c5 100644
--- a/python/tests/test_functions.py
+++ b/python/tests/test_functions.py
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
import math
-from datetime import datetime, timezone
+from datetime import date, datetime, time, timezone
import numpy as np
import pyarrow as pa
@@ -958,6 +958,12 @@ def test_temporal_functions(df):
f.to_timestamp_nanos(
literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d
%H:%M:%S.%f")
),
+ f.to_time(literal("12:30:45")),
+ f.to_time(literal("12-30-45"), literal("%H-%M-%S")),
+ f.to_date(literal("2017-05-31")),
+ f.to_date(literal("2017-05-31"), literal("%Y-%m-%d")),
+ f.to_local_time(column("d")),
+ f.to_char(column("d"), literal("%d-%m-%Y")),
)
result = df.collect()
assert len(result) == 1
@@ -1032,6 +1038,73 @@ def test_temporal_functions(df):
[datetime(2023, 9, 7, 5, 6, 14, 523952, tzinfo=DEFAULT_TZ)] * 3,
type=pa.timestamp("ns"),
)
+ assert result.column(17) == pa.array(
+ [time(12, 30, 45)] * 3,
+ type=pa.time64("ns"),
+ )
+ assert result.column(18) == pa.array(
+ [time(12, 30, 45)] * 3,
+ type=pa.time64("ns"),
+ )
+ assert result.column(19) == pa.array(
+ [date(2017, 5, 31)] * 3,
+ type=pa.date32(),
+ )
+ assert result.column(20) == pa.array(
+ [date(2017, 5, 31)] * 3,
+ type=pa.date32(),
+ )
+ assert result.column(21) == pa.array(
+ [
+ datetime(2022, 12, 31, tzinfo=DEFAULT_TZ),
+ datetime(2027, 6, 26, tzinfo=DEFAULT_TZ),
+ datetime(2020, 7, 2, tzinfo=DEFAULT_TZ),
+ ],
+ type=pa.timestamp("us"),
+ )
+
+ assert result.column(22) == pa.array(
+ [
+ "31-12-2022",
+ "26-06-2027",
+ "02-07-2020",
+ ],
+ type=pa.string(),
+ )
+
+
+def test_to_time_invalid_input(df):
+ with pytest.raises(Exception, match=r"Error parsing 'not-a-time' as time"):
+ df.select(f.to_time(literal("not-a-time"))).collect()
+
+
+def test_to_time_mismatched_formatter(df):
+ with pytest.raises(Exception, match=r"Error parsing '12:30:45' as time"):
+ df.select(f.to_time(literal("12:30:45"),
literal("%Y-%m-%d"))).collect()
+
+
+def test_to_date_invalid_input(df):
+ with pytest.raises(Exception, match=r"Date32"):
+ df.select(f.to_date(literal("not-a-date"))).collect()
+
+
+def test_temporal_formatter_requires_expr():
+ with pytest.raises(AttributeError, match="'str' object has no attribute
'expr'"):
+ f.to_time(literal("12:30:45"), "not-an-expr")
+
+
+def test_today_returns_date32(df):
+ result = df.select(f.today().alias("today")).collect()[0]
+ assert result.column(0).type == pa.date32()
+
+
+def test_today_alias_matches_current_date(df):
+ result = df.select(
+ f.current_date().alias("current_date"),
+ f.today().alias("today"),
+ ).collect()[0]
+
+ assert result.column(0) == result.column(1)
def test_arrow_cast(df):
diff --git a/src/functions.rs b/src/functions.rs
index 90b3a0a4..c3213405 100644
--- a/src/functions.rs
+++ b/src/functions.rs
@@ -601,6 +601,9 @@ expr_fn!(
"Converts the number to its equivalent hexadecimal representation."
);
expr_fn!(now);
+expr_fn_vec!(to_date);
+expr_fn_vec!(to_local_time);
+expr_fn_vec!(to_time);
expr_fn_vec!(to_timestamp);
expr_fn_vec!(to_timestamp_millis);
expr_fn_vec!(to_timestamp_nanos);
@@ -613,6 +616,7 @@ expr_fn!(date_part, part date);
expr_fn!(date_trunc, part date);
expr_fn!(date_bin, stride source origin);
expr_fn!(make_date, year month day);
+expr_fn!(to_char, datetime format);
expr_fn!(translate, string from to, "Replaces each character in string that
matches a character in the from set with the corresponding character in the to
set. If from is longer than to, occurrences of the extra characters in from are
deleted.");
expr_fn_vec!(
@@ -1045,6 +1049,10 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) ->
PyResult<()> {
m.add_wrapped(wrap_pyfunction!(tan))?;
m.add_wrapped(wrap_pyfunction!(tanh))?;
m.add_wrapped(wrap_pyfunction!(to_hex))?;
+ m.add_wrapped(wrap_pyfunction!(to_char))?;
+ m.add_wrapped(wrap_pyfunction!(to_date))?;
+ m.add_wrapped(wrap_pyfunction!(to_local_time))?;
+ m.add_wrapped(wrap_pyfunction!(to_time))?;
m.add_wrapped(wrap_pyfunction!(to_timestamp))?;
m.add_wrapped(wrap_pyfunction!(to_timestamp_millis))?;
m.add_wrapped(wrap_pyfunction!(to_timestamp_nanos))?;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]