Copilot commented on code in PR #1484:
URL:
https://github.com/apache/datafusion-python/pull/1484#discussion_r3079368033
##########
python/datafusion/functions.py:
##########
@@ -3812,27 +3867,32 @@ def string_to_array(
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": ["hello,world"]})
>>> result = df.select(
- ... dfn.functions.string_to_array(
- ... dfn.col("a"), dfn.lit(","),
- ... ).alias("result"))
+ ... dfn.functions.string_to_array(dfn.col("a"),
",").alias("result"))
>>> result.collect_column("result")[0].as_py()
['hello', 'world']
Replace parts matching a ``null_string`` with ``NULL``:
>>> result = df.select(
... dfn.functions.string_to_array(
- ... dfn.col("a"), dfn.lit(","), null_string=dfn.lit("world"),
+ ... dfn.col("a"), ",", null_string="world",
... ).alias("result"))
>>> result.collect_column("result")[0].as_py()
['hello', None]
"""
- null_expr = null_string.expr if null_string is not None else None
- return Expr(f.string_to_array(string.expr, delimiter.expr, null_expr))
+ delimiter = coerce_to_expr(delimiter)
+ null_string = coerce_to_expr_or_none(null_string)
+ return Expr(
+ f.string_to_array(
+ string.expr,
+ delimiter.expr,
+ null_string.expr if null_string else None,
Review Comment:
`null_string` is optional (after `coerce_to_expr_or_none`), but the current
`null_string.expr if null_string else None` relies on `Expr` truthiness. Using
`null_string is not None` would better match the rest of this module’s
optional-Expr conventions and makes it unambiguous that only `None` should map
to an omitted argument.
```suggestion
null_string.expr if null_string is not None else None,
```
##########
python/datafusion/functions.py:
##########
@@ -3509,36 +3559,41 @@ def list_sort(array: Expr, descending: bool = False,
null_first: bool = False) -
def array_slice(
- array: Expr, begin: Expr, end: Expr, stride: Expr | None = None
+ array: Expr,
+ begin: Expr | int,
+ end: Expr | int,
+ stride: Expr | int | None = None,
) -> Expr:
"""Returns a slice of the array.
Examples:
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [[1, 2, 3, 4]]})
>>> result = df.select(
- ... dfn.functions.array_slice(
- ... dfn.col("a"), dfn.lit(2), dfn.lit(3)
- ... ).alias("result"))
+ ... dfn.functions.array_slice(dfn.col("a"), 2, 3).alias("result"))
>>> result.collect_column("result")[0].as_py()
[2, 3]
Use ``stride`` to skip elements:
>>> result = df.select(
... dfn.functions.array_slice(
- ... dfn.col("a"), dfn.lit(1), dfn.lit(4),
- ... stride=dfn.lit(2),
+ ... dfn.col("a"), 1, 4, stride=2,
... ).alias("result"))
>>> result.collect_column("result")[0].as_py()
[1, 3]
"""
- if stride is not None:
- stride = stride.expr
- return Expr(f.array_slice(array.expr, begin.expr, end.expr, stride))
+ begin = coerce_to_expr(begin)
+ end = coerce_to_expr(end)
+ stride = coerce_to_expr_or_none(stride)
+ return Expr(
+ f.array_slice(array.expr, begin.expr, end.expr, stride.expr if stride
else None)
Review Comment:
`stride` is an optional `Expr` after `coerce_to_expr_or_none`, but this uses
`stride.expr if stride else None`. For consistency with other optional-expr
handling in this module (and to avoid relying on `Expr` truthiness), prefer an
explicit `is not None` check when deciding whether to pass `None` down to the
Rust binding.
```suggestion
f.array_slice(
array.expr,
begin.expr,
end.expr,
stride.expr if stride is not None else None,
)
```
##########
python/datafusion/functions.py:
##########
@@ -363,49 +365,52 @@ def nullif(expr1: Expr, expr2: Expr) -> Expr:
return Expr(f.nullif(expr1.expr, expr2.expr))
-def encode(expr: Expr, encoding: Expr) -> Expr:
+def encode(expr: Expr, encoding: Expr | str) -> Expr:
"""Encode the ``input``, using the ``encoding``. encoding can be base64 or
hex.
Examples:
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": ["hello"]})
>>> result = df.select(
- ... dfn.functions.encode(dfn.col("a"),
dfn.lit("base64")).alias("enc"))
+ ... dfn.functions.encode(dfn.col("a"), "base64").alias("enc"))
>>> result.collect_column("enc")[0].as_py()
'aGVsbG8'
"""
+ encoding = coerce_to_expr(encoding)
return Expr(f.encode(expr.expr, encoding.expr))
Review Comment:
Many functions in this module were updated to accept native Python literals
(via `coerce_to_expr*`). There are existing expression-shape tests in
`python/tests/test_functions.py` for these wrappers, but they currently appear
to exercise only the `lit(...)` call style. Adding a few representative
assertions using the new pythonic calling conventions (e.g., `encode(col("a"),
"base64")`, `split_part(col("a"), ",", 2)`, `regexp_count(col("a"), "abc",
start=4, flags="i")`) would help prevent regressions.
##########
python/datafusion/functions.py:
##########
@@ -1461,19 +1478,20 @@ def regexp_like(string: Expr, regex: Expr, flags: Expr
| None = None) -> Expr:
>>> result = df.select(
... dfn.functions.regexp_like(
- ... dfn.col("a"), dfn.lit("HELLO"),
- ... flags=dfn.lit("i"),
+ ... dfn.col("a"), "HELLO", flags="i",
... ).alias("m")
... )
>>> result.collect_column("m")[0].as_py()
True
"""
- if flags is not None:
- flags = flags.expr
- return Expr(f.regexp_like(string.expr, regex.expr, flags))
+ regex = coerce_to_expr(regex)
+ flags = coerce_to_expr_or_none(flags)
+ return Expr(f.regexp_like(string.expr, regex.expr, flags.expr if flags
else None))
Review Comment:
In the optional-argument handling here, `flags.expr if flags else None`
relies on the truthiness of an `Expr` instance. Elsewhere in this file optional
`Expr` values are typically checked with `is not None` (e.g., `regexp_instr`).
Using explicit `is not None` would be clearer/consistent and avoids any future
issues if `Expr` ever defines `__bool__`/truthiness semantics. The same pattern
appears in the other `regexp_*` helpers nearby.
##########
python/datafusion/expr.py:
##########
@@ -277,6 +283,41 @@ def _iter(
return list(_iter(exprs))
+def coerce_to_expr(value: Any) -> Expr:
+ """Coerce a native Python value to an ``Expr`` literal, passing ``Expr``
through.
+
+ This is the complement of :func:`ensure_expr`: where ``ensure_expr``
+ *rejects* non-``Expr`` values, ``coerce_to_expr`` *wraps* them via
+ :meth:`Expr.literal` so that functions can accept native Python types
+ (``int``, ``float``, ``str``, ``bool``, etc.) alongside ``Expr``.
+
+ Args:
+ value: An ``Expr`` instance (returned as-is) or a Python literal to
wrap.
+
+ Returns:
+ An ``Expr`` representing the value.
+ """
+ if isinstance(value, Expr):
+ return value
+ return Expr.literal(value)
+
+
+def coerce_to_expr_or_none(value: Any | None) -> Expr | None:
+ """Coerce a value to ``Expr`` or pass ``None`` through unchanged.
+
+ Same as :func:`coerce_to_expr` but accepts ``None`` for optional
parameters.
+
+ Args:
+ value: An ``Expr`` instance, a Python literal to wrap, or ``None``.
+
+ Returns:
+ An ``Expr`` representing the value, or ``None``.
+ """
+ if value is None:
+ return None
+ return coerce_to_expr(value)
+
Review Comment:
New coercion helpers are introduced here and a large number of public
functions now depend on them to accept native Python literals. Please add unit
tests to lock in the intended behavior (e.g., `coerce_to_expr(1)` returns an
`Expr` literal, `coerce_to_expr` passes through an existing `Expr`, and
`coerce_to_expr_or_none(None)` returns `None`). This can likely live alongside
the existing `ensure_expr` tests in `python/tests/test_expr.py`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]