This is an automated email from the ASF dual-hosted git repository.
kosiew pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-python.git
The following commit(s) were added to refs/heads/main by this push:
new b9a958e3 Add docstring examples for Scalar math functions (#1421)
b9a958e3 is described below
commit b9a958e3893a9a208d67aac314a9ede97b370679
Author: Nick <[email protected]>
AuthorDate: Tue Mar 17 02:13:18 2026 -0400
Add docstring examples for Scalar math functions (#1421)
* Add docstring examples for Scalar math functions
Add example usage to docstrings for Scalar math functions to improve
documentation.
Co-Authored-By: Claude Opus 4.6 <[email protected]>
* Fix copy past error on name
* Remove example from alias
* Examples google docstyle
---------
Co-authored-by: Claude Opus 4.6 <[email protected]>
---
python/datafusion/functions.py | 228 ++++++++++++++++++++++++++++++++++++-----
1 file changed, 205 insertions(+), 23 deletions(-)
diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py
index 73df5664..3de2f130 100644
--- a/python/datafusion/functions.py
+++ b/python/datafusion/functions.py
@@ -484,10 +484,12 @@ def window(
def abs(arg: Expr) -> Expr:
"""Return the absolute value of a given number.
- Returns:
- --------
- Expr
- A new expression representing the absolute value of the input
expression.
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [-1, 0, 1]})
+ >>> result = df.select(dfn.functions.abs(dfn.col("a")).alias("abs"))
+ >>> result.collect_column("abs")[0].as_py()
+ 1
"""
return Expr(f.abs(arg.expr))
@@ -600,12 +602,28 @@ def btrim(arg: Expr) -> Expr:
def cbrt(arg: Expr) -> Expr:
- """Returns the cube root of a number."""
+ """Returns the cube root of a number.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [27]})
+ >>> cbrt_df = df.select(dfn.functions.cbrt(dfn.col("a")).alias("cbrt"))
+ >>> cbrt_df.collect_column("cbrt")[0].as_py()
+ 3.0
+ """
return Expr(f.cbrt(arg.expr))
def ceil(arg: Expr) -> Expr:
- """Returns the nearest integer greater than or equal to argument."""
+ """Returns the nearest integer greater than or equal to argument.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [1.9]})
+ >>> ceil_df = df.select(dfn.functions.ceil(dfn.col("a")).alias("ceil"))
+ >>> ceil_df.collect_column("ceil")[0].as_py()
+ 2.0
+ """
return Expr(f.ceil(arg.expr))
@@ -709,12 +727,30 @@ def ends_with(arg: Expr, suffix: Expr) -> Expr:
def exp(arg: Expr) -> Expr:
- """Returns the exponential of the argument."""
+ """Returns the exponential of the argument.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [0.0]})
+ >>> result = df.select(dfn.functions.exp(dfn.col("a")).alias("exp"))
+ >>> result.collect_column("exp")[0].as_py()
+ 1.0
+ """
return Expr(f.exp(arg.expr))
def factorial(arg: Expr) -> Expr:
- """Returns the factorial of the argument."""
+ """Returns the factorial of the argument.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [3]})
+ >>> result = df.select(
+ ... dfn.functions.factorial(dfn.col("a")).alias("factorial")
+ ... )
+ >>> result.collect_column("factorial")[0].as_py()
+ 6
+ """
return Expr(f.factorial(arg.expr))
@@ -730,12 +766,30 @@ def find_in_set(string: Expr, string_list: Expr) -> Expr:
def floor(arg: Expr) -> Expr:
- """Returns the nearest integer less than or equal to the argument."""
+ """Returns the nearest integer less than or equal to the argument.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [1.9]})
+ >>> floor_df =
df.select(dfn.functions.floor(dfn.col("a")).alias("floor"))
+ >>> floor_df.collect_column("floor")[0].as_py()
+ 1.0
+ """
return Expr(f.floor(arg.expr))
def gcd(x: Expr, y: Expr) -> Expr:
- """Returns the greatest common divisor."""
+ """Returns the greatest common divisor.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [12], "b": [8]})
+ >>> result = df.select(
+ ... dfn.functions.gcd(dfn.col("a"), dfn.col("b")).alias("gcd")
+ ... )
+ >>> result.collect_column("gcd")[0].as_py()
+ 4
+ """
return Expr(f.gcd(x.expr, y.expr))
@@ -757,12 +811,30 @@ def instr(string: Expr, substring: Expr) -> Expr:
def iszero(arg: Expr) -> Expr:
- """Returns true if a given number is +0.0 or -0.0 otherwise returns
false."""
+ """Returns true if a given number is +0.0 or -0.0 otherwise returns false.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [0.0, 1.0]})
+ >>> result = df.select(dfn.functions.iszero(dfn.col("a")).alias("iz"))
+ >>> result.collect_column("iz")[0].as_py()
+ True
+ """
return Expr(f.iszero(arg.expr))
def lcm(x: Expr, y: Expr) -> Expr:
- """Returns the least common multiple."""
+ """Returns the least common multiple.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [4], "b": [6]})
+ >>> result = df.select(
+ ... dfn.functions.lcm(dfn.col("a"), dfn.col("b")).alias("lcm")
+ ... )
+ >>> result.collect_column("lcm")[0].as_py()
+ 12
+ """
return Expr(f.lcm(x.expr, y.expr))
@@ -777,22 +849,56 @@ def levenshtein(string1: Expr, string2: Expr) -> Expr:
def ln(arg: Expr) -> Expr:
- """Returns the natural logarithm (base e) of the argument."""
+ """Returns the natural logarithm (base e) of the argument.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [1.0]})
+ >>> result = df.select(dfn.functions.ln(dfn.col("a")).alias("ln"))
+ >>> result.collect_column("ln")[0].as_py()
+ 0.0
+ """
return Expr(f.ln(arg.expr))
def log(base: Expr, num: Expr) -> Expr:
- """Returns the logarithm of a number for a particular ``base``."""
+ """Returns the logarithm of a number for a particular ``base``.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [100.0]})
+ >>> result = df.select(
+ ... dfn.functions.log(dfn.lit(10.0), dfn.col("a")).alias("log")
+ ... )
+ >>> result.collect_column("log")[0].as_py()
+ 2.0
+ """
return Expr(f.log(base.expr, num.expr))
def log10(arg: Expr) -> Expr:
- """Base 10 logarithm of the argument."""
+ """Base 10 logarithm of the argument.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [100.0]})
+ >>> result =
df.select(dfn.functions.log10(dfn.col("a")).alias("log10"))
+ >>> result.collect_column("log10")[0].as_py()
+ 2.0
+ """
return Expr(f.log10(arg.expr))
def log2(arg: Expr) -> Expr:
- """Base 2 logarithm of the argument."""
+ """Base 2 logarithm of the argument.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [8.0]})
+ >>> result = df.select(dfn.functions.log2(dfn.col("a")).alias("log2"))
+ >>> result.collect_column("log2")[0].as_py()
+ 3.0
+ """
return Expr(f.log2(arg.expr))
@@ -831,7 +937,18 @@ def md5(arg: Expr) -> Expr:
def nanvl(x: Expr, y: Expr) -> Expr:
- """Returns ``x`` if ``x`` is not ``NaN``. Otherwise returns ``y``."""
+ """Returns ``x`` if ``x`` is not ``NaN``. Otherwise returns ``y``.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [np.nan, 1.0], "b": [0.0, 0.0]})
+ >>> nanvl_df = df.select(
+ ... dfn.functions.nanvl(dfn.col("a"), dfn.col("b")).alias("nanvl"))
+ >>> nanvl_df.collect_column("nanvl")[0].as_py()
+ 0.0
+ >>> nanvl_df.collect_column("nanvl")[1].as_py()
+ 1.0
+ """
return Expr(f.nanvl(x.expr, y.expr))
@@ -871,7 +988,20 @@ def overlay(
def pi() -> Expr:
- """Returns an approximate value of π."""
+ """Returns an approximate value of π.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [1]})
+ >>> import builtins
+ >>> result = df.select(
+ ... dfn.functions.pi().alias("pi")
+ ... )
+ >>> builtins.round(
+ ... result.collect_column("pi")[0].as_py(), 5
+ ... )
+ 3.14159
+ """
return Expr(f.pi())
@@ -884,7 +1014,17 @@ def position(string: Expr, substring: Expr) -> Expr:
def power(base: Expr, exponent: Expr) -> Expr:
- """Returns ``base`` raised to the power of ``exponent``."""
+ """Returns ``base`` raised to the power of ``exponent``.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [2.0]})
+ >>> result = df.select(
+ ... dfn.functions.power(dfn.col("a"), dfn.lit(3.0)).alias("pow")
+ ... )
+ >>> result.collect_column("pow")[0].as_py()
+ 8.0
+ """
return Expr(f.power(base.expr, exponent.expr))
@@ -1081,6 +1221,13 @@ def round(value: Expr, decimal_places: Expr | None =
None) -> Expr:
If the optional ``decimal_places`` is specified, round to the nearest
number of
decimal places. You can specify a negative number of decimal places. For
example
``round(lit(125.2345), lit(-2))`` would yield a value of ``100.0``.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [1.567]})
+ >>> result = df.select(dfn.functions.round(dfn.col("a"),
dfn.lit(2)).alias("r"))
+ >>> result.collect_column("r")[0].as_py()
+ 1.57
"""
if decimal_places is None:
decimal_places = Expr.literal(0)
@@ -1163,7 +1310,15 @@ def sha512(arg: Expr) -> Expr:
def signum(arg: Expr) -> Expr:
- """Returns the sign of the argument (-1, 0, +1)."""
+ """Returns the sign of the argument (-1, 0, +1).
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [-5.0, 0.0, 5.0]})
+ >>> result = df.select(dfn.functions.signum(dfn.col("a")).alias("s"))
+ >>> result.collect_column("s").to_pylist()
+ [-1.0, 0.0, 1.0]
+ """
return Expr(f.signum(arg.expr))
@@ -1203,7 +1358,15 @@ def split_part(string: Expr, delimiter: Expr, index:
Expr) -> Expr:
def sqrt(arg: Expr) -> Expr:
- """Returns the square root of the argument."""
+ """Returns the square root of the argument.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [9.0]})
+ >>> result = df.select(dfn.functions.sqrt(dfn.col("a")).alias("sqrt"))
+ >>> result.collect_column("sqrt")[0].as_py()
+ 3.0
+ """
return Expr(f.sqrt(arg.expr))
@@ -1440,7 +1603,15 @@ def trim(arg: Expr) -> Expr:
def trunc(num: Expr, precision: Expr | None = None) -> Expr:
- """Truncate the number toward zero with optional precision."""
+ """Truncate the number toward zero with optional precision.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [1.567]})
+ >>> result = df.select(dfn.functions.trunc(dfn.col("a")).alias("t"))
+ >>> result.collect_column("t")[0].as_py()
+ 1.0
+ """
if precision is not None:
return Expr(f.trunc(num.expr, precision.expr))
return Expr(f.trunc(num.expr))
@@ -1574,7 +1745,18 @@ def arrow_cast(expr: Expr, data_type: Expr) -> Expr:
def random() -> Expr:
- """Returns a random value in the range ``0.0 <= x < 1.0``."""
+ """Returns a random value in the range ``0.0 <= x < 1.0``.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [1]})
+ >>> result = df.select(
+ ... dfn.functions.random().alias("r")
+ ... )
+ >>> val = result.collect_column("r")[0].as_py()
+ >>> 0.0 <= val < 1.0
+ True
+ """
return Expr(f.random())
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]