(datafusion-python) branch main updated: Add docstring examples for Scalar string functions (#1423)

kosiew Tue, 17 Mar 2026 23:01:59 -0700

This is an automated email from the ASF dual-hosted git repository.

kosiew pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-python.git



The following commit(s) were added to refs/heads/main by this push:
     new 4e51fa89 Add docstring examples for Scalar string functions (#1423)
4e51fa89 is described below

commit 4e51fa8935799343c973e9cd306f42d278620d42
Author: Nick <[email protected]>
AuthorDate: Wed Mar 18 02:01:44 2026 -0400

    Add docstring examples for Scalar string functions (#1423)
    
    * Add docstring examples for Scalar string functions
    
    Add example usage to docstrings for Scalar string functions to improve 
documentation.
    
    Co-Authored-By: Claude Opus 4.6 <[email protected]>
    
    * Remove examples for aliases
    
    ---------
    
    Co-authored-by: Claude Opus 4.6 <[email protected]>
---
 python/datafusion/functions.py | 361 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 335 insertions(+), 26 deletions(-)

diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py
index fbca979c..a4933a74 100644
--- a/python/datafusion/functions.py
+++ b/python/datafusion/functions.py
@@ -649,7 +649,16 @@ def acosh(arg: Expr) -> Expr:
 
 
 def ascii(arg: Expr) -> Expr:
-    """Returns the numeric code of the first character of the argument."""
+    """Returns the numeric code of the first character of the argument.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["a","b","c"]})
+    >>> ascii_df = df.select(dfn.functions.ascii(dfn.col("a")).alias("ascii"))
+    >>> ascii_df.collect_column("ascii")[0].as_py()
+    97
+    """
     return Expr(f.ascii(arg.expr))
 
 
@@ -720,12 +729,30 @@ def atan2(y: Expr, x: Expr) -> Expr:
 
 
 def bit_length(arg: Expr) -> Expr:
-    """Returns the number of bits in the string argument."""
+    """Returns the number of bits in the string argument.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["a","b","c"]})
+    >>> bit_df = 
df.select(dfn.functions.bit_length(dfn.col("a")).alias("bit_len"))
+    >>> bit_df.collect_column("bit_len")[0].as_py()
+    8
+    """
     return Expr(f.bit_length(arg.expr))
 
 
 def btrim(arg: Expr) -> Expr:
-    """Removes all characters, spaces by default, from both sides of a 
string."""
+    """Removes all characters, spaces by default, from both sides of a string.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": [" a  "]})
+    >>> trim_df = df.select(dfn.functions.btrim(dfn.col("a")).alias("trimmed"))
+    >>> trim_df.collect_column("trimmed")[0].as_py()
+    'a'
+    """
     return Expr(f.btrim(arg.expr))
 
 
@@ -756,22 +783,59 @@ def ceil(arg: Expr) -> Expr:
 
 
 def character_length(arg: Expr) -> Expr:
-    """Returns the number of characters in the argument."""
+    """Returns the number of characters in the argument.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["abc","b","c"]})
+    >>> char_len_df = df.select(
+    ...     dfn.functions.character_length(dfn.col("a")).alias("char_len"))
+    >>> char_len_df.collect_column("char_len")[0].as_py()
+    3
+    """
     return Expr(f.character_length(arg.expr))
 
 
 def length(string: Expr) -> Expr:
-    """The number of characters in the ``string``."""
+    """The number of characters in the ``string``.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["hello"]})
+    >>> result = df.select(dfn.functions.length(dfn.col("a")).alias("len"))
+    >>> result.collect_column("len")[0].as_py()
+    5
+    """
     return Expr(f.length(string.expr))
 
 
 def char_length(string: Expr) -> Expr:
-    """The number of characters in the ``string``."""
+    """The number of characters in the ``string``.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["hello"]})
+    >>> result = 
df.select(dfn.functions.char_length(dfn.col("a")).alias("len"))
+    >>> result.collect_column("len")[0].as_py()
+    5
+    """
     return Expr(f.char_length(string.expr))
 
 
 def chr(arg: Expr) -> Expr:
-    """Converts the Unicode code point to a UTF8 character."""
+    """Converts the Unicode code point to a UTF8 character.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": [65]})
+    >>> result = df.select(dfn.functions.chr(dfn.col("a")).alias("chr"))
+    >>> result.collect_column("chr")[0].as_py()
+    'A'
+    """
     return Expr(f.chr(arg.expr))
 
 
@@ -847,7 +911,17 @@ def degrees(arg: Expr) -> Expr:
 
 
 def ends_with(arg: Expr, suffix: Expr) -> Expr:
-    """Returns true if the ``string`` ends with the ``suffix``, false 
otherwise."""
+    """Returns true if the ``string`` ends with the ``suffix``, false 
otherwise.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["abc","b","c"]})
+    >>> ends_with_df = df.select(
+    ...     dfn.functions.ends_with(dfn.col("a"), 
dfn.lit("c")).alias("ends_with"))
+    >>> ends_with_df.collect_column("ends_with")[0].as_py()
+    True
+    """
     return Expr(f.ends_with(arg.expr, suffix.expr))
 
 
@@ -886,6 +960,15 @@ def find_in_set(string: Expr, string_list: Expr) -> Expr:
     ``string_list`` consisting of N substrings.
 
     The string list is a string composed of substrings separated by ``,`` 
characters.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["b"]})
+    >>> result = df.select(
+    ...     dfn.functions.find_in_set(dfn.col("a"), 
dfn.lit("a,b,c")).alias("pos"))
+    >>> result.collect_column("pos")[0].as_py()
+    2
     """
     return Expr(f.find_in_set(string.expr, string_list.expr))
 
@@ -923,6 +1006,14 @@ def initcap(string: Expr) -> Expr:
 
     Converts the first letter of each word in ``string`` to uppercase and the 
remaining
     characters to lowercase.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["the cat"]})
+    >>> cap_df = df.select(dfn.functions.initcap(dfn.col("a")).alias("cap"))
+    >>> cap_df.collect_column("cap")[0].as_py()
+    'The Cat'
     """
     return Expr(f.initcap(string.expr))
 
@@ -964,12 +1055,31 @@ def lcm(x: Expr, y: Expr) -> Expr:
 
 
 def left(string: Expr, n: Expr) -> Expr:
-    """Returns the first ``n`` characters in the ``string``."""
+    """Returns the first ``n`` characters in the ``string``.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["the cat"]})
+    >>> left_df = df.select(dfn.functions.left(dfn.col("a"), 
dfn.lit(3)).alias("left"))
+    >>> left_df.collect_column("left")[0].as_py()
+    'the'
+    """
     return Expr(f.left(string.expr, n.expr))
 
 
 def levenshtein(string1: Expr, string2: Expr) -> Expr:
-    """Returns the Levenshtein distance between the two given strings."""
+    """Returns the Levenshtein distance between the two given strings.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["kitten"]})
+    >>> result = df.select(
+    ...     dfn.functions.levenshtein(dfn.col("a"), 
dfn.lit("sitting")).alias("d"))
+    >>> result.collect_column("d")[0].as_py()
+    3
+    """
     return Expr(f.levenshtein(string1.expr, string2.expr))
 
 
@@ -1028,7 +1138,16 @@ def log2(arg: Expr) -> Expr:
 
 
 def lower(arg: Expr) -> Expr:
-    """Converts a string to lowercase."""
+    """Converts a string to lowercase.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["THE CaT"]})
+    >>> lower_df = df.select(dfn.functions.lower(dfn.col("a")).alias("lower"))
+    >>> lower_df.collect_column("lower")[0].as_py()
+    'the cat'
+    """
     return Expr(f.lower(arg.expr))
 
 
@@ -1038,13 +1157,32 @@ def lpad(string: Expr, count: Expr, characters: Expr | 
None = None) -> Expr:
     Extends the string to length length by prepending the characters fill (a
     space by default). If the string is already longer than length then it is
     truncated (on the right).
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["the cat", "a hat"]})
+    >>> lpad_df = df.select(dfn.functions.lpad(dfn.col("a"), 
dfn.lit(6)).alias("lpad"))
+    >>> lpad_df.collect_column("lpad")[0].as_py()
+    'the ca'
+    >>> lpad_df.collect_column("lpad")[1].as_py()
+    ' a hat'
     """
     characters = characters if characters is not None else Expr.literal(" ")
     return Expr(f.lpad(string.expr, count.expr, characters.expr))
 
 
 def ltrim(arg: Expr) -> Expr:
-    """Removes all characters, spaces by default, from the beginning of a 
string."""
+    """Removes all characters, spaces by default, from the beginning of a 
string.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": [" a  "]})
+    >>> trim_df = df.select(dfn.functions.ltrim(dfn.col("a")).alias("trimmed"))
+    >>> trim_df.collect_column("trimmed")[0].as_py()
+    'a  '
+    """
     return Expr(f.ltrim(arg.expr))
 
 
@@ -1095,7 +1233,16 @@ def nvl(x: Expr, y: Expr) -> Expr:
 
 
 def octet_length(arg: Expr) -> Expr:
-    """Returns the number of bytes of a string."""
+    """Returns the number of bytes of a string.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["hello"]})
+    >>> result = 
df.select(dfn.functions.octet_length(dfn.col("a")).alias("len"))
+    >>> result.collect_column("len")[0].as_py()
+    5
+    """
     return Expr(f.octet_length(arg.expr))
 
 
@@ -1106,6 +1253,16 @@ def overlay(
 
     Replace the substring of string that starts at the ``start``'th character 
and
     extends for ``length`` characters with new substring.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["abcdef"]})
+    >>> result = df.select(
+    ...     dfn.functions.overlay(dfn.col("a"), dfn.lit("XY"), dfn.lit(3),
+    ...     dfn.lit(2)).alias("o"))
+    >>> result.collect_column("o")[0].as_py()
+    'abXYef'
     """
     if length is None:
         return Expr(f.overlay(string.expr, substring.expr, start.expr))
@@ -1318,22 +1475,60 @@ def regexp_instr(
 
 
 def repeat(string: Expr, n: Expr) -> Expr:
-    """Repeats the ``string`` to ``n`` times."""
+    """Repeats the ``string`` to ``n`` times.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["ha"]})
+    >>> result = df.select(dfn.functions.repeat(dfn.col("a"), 
dfn.lit(3)).alias("r"))
+    >>> result.collect_column("r")[0].as_py()
+    'hahaha'
+    """
     return Expr(f.repeat(string.expr, n.expr))
 
 
 def replace(string: Expr, from_val: Expr, to_val: Expr) -> Expr:
-    """Replaces all occurrences of ``from_val`` with ``to_val`` in the 
``string``."""
+    """Replaces all occurrences of ``from_val`` with ``to_val`` in the 
``string``.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["hello world"]})
+    >>> result = df.select(
+    ...     dfn.functions.replace(dfn.col("a"), dfn.lit("world"),
+    ...     dfn.lit("there")).alias("r"))
+    >>> result.collect_column("r")[0].as_py()
+    'hello there'
+    """
     return Expr(f.replace(string.expr, from_val.expr, to_val.expr))
 
 
 def reverse(arg: Expr) -> Expr:
-    """Reverse the string argument."""
+    """Reverse the string argument.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["hello"]})
+    >>> result = df.select(dfn.functions.reverse(dfn.col("a")).alias("r"))
+    >>> result.collect_column("r")[0].as_py()
+    'olleh'
+    """
     return Expr(f.reverse(arg.expr))
 
 
 def right(string: Expr, n: Expr) -> Expr:
-    """Returns the last ``n`` characters in the ``string``."""
+    """Returns the last ``n`` characters in the ``string``.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["hello"]})
+    >>> result = df.select(dfn.functions.right(dfn.col("a"), 
dfn.lit(3)).alias("r"))
+    >>> result.collect_column("r")[0].as_py()
+    'llo'
+    """
     return Expr(f.right(string.expr, n.expr))
 
 
@@ -1361,13 +1556,31 @@ def rpad(string: Expr, count: Expr, characters: Expr | 
None = None) -> Expr:
 
     Extends the string to length length by appending the characters fill (a 
space
     by default). If the string is already longer than length then it is 
truncated.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["hi"]})
+    >>> result = df.select(
+    ...     dfn.functions.rpad(dfn.col("a"), dfn.lit(5), 
dfn.lit("!")).alias("r"))
+    >>> result.collect_column("r")[0].as_py()
+    'hi!!!'
     """
     characters = characters if characters is not None else Expr.literal(" ")
     return Expr(f.rpad(string.expr, count.expr, characters.expr))
 
 
 def rtrim(arg: Expr) -> Expr:
-    """Removes all characters, spaces by default, from the end of a string."""
+    """Removes all characters, spaces by default, from the end of a string.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": [" a  "]})
+    >>> trim_df = df.select(dfn.functions.rtrim(dfn.col("a")).alias("trimmed"))
+    >>> trim_df.collect_column("trimmed")[0].as_py()
+    ' a'
+    """
     return Expr(f.rtrim(arg.expr))
 
 
@@ -1475,6 +1688,15 @@ def split_part(string: Expr, delimiter: Expr, index: 
Expr) -> Expr:
 
     Splits a string based on a delimiter and picks out the desired field based
     on the index.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["a,b,c"]})
+    >>> result = df.select(
+    ...     dfn.functions.split_part(dfn.col("a"), dfn.lit(","), 
dfn.lit(2)).alias("s"))
+    >>> result.collect_column("s")[0].as_py()
+    'b'
     """
     return Expr(f.split_part(string.expr, delimiter.expr, index.expr))
 
@@ -1493,17 +1715,46 @@ def sqrt(arg: Expr) -> Expr:
 
 
 def starts_with(string: Expr, prefix: Expr) -> Expr:
-    """Returns true if string starts with prefix."""
+    """Returns true if string starts with prefix.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["hello_from_datafusion"]})
+    >>> result = df.select(
+    ...     dfn.functions.starts_with(dfn.col("a"), 
dfn.lit("hello")).alias("sw"))
+    >>> result.collect_column("sw")[0].as_py()
+    True
+    """
     return Expr(f.starts_with(string.expr, prefix.expr))
 
 
 def strpos(string: Expr, substring: Expr) -> Expr:
-    """Finds the position from where the ``substring`` matches the 
``string``."""
+    """Finds the position from where the ``substring`` matches the ``string``.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["hello"]})
+    >>> result = df.select(
+    ...     dfn.functions.strpos(dfn.col("a"), dfn.lit("llo")).alias("pos"))
+    >>> result.collect_column("pos")[0].as_py()
+    3
+    """
     return Expr(f.strpos(string.expr, substring.expr))
 
 
 def substr(string: Expr, position: Expr) -> Expr:
-    """Substring from the ``position`` to the end."""
+    """Substring from the ``position`` to the end.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["hello"]})
+    >>> result = df.select(dfn.functions.substr(dfn.col("a"), 
dfn.lit(3)).alias("s"))
+    >>> result.collect_column("s")[0].as_py()
+    'llo'
+    """
     return Expr(f.substr(string.expr, position.expr))
 
 
@@ -1512,12 +1763,32 @@ def substr_index(string: Expr, delimiter: Expr, count: 
Expr) -> Expr:
 
     The return will be the ``string`` from before ``count`` occurrences of
     ``delimiter``.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["a.b.c"]})
+    >>> result = df.select(
+    ...     dfn.functions.substr_index(dfn.col("a"), dfn.lit("."),
+    ...     dfn.lit(2)).alias("s"))
+    >>> result.collect_column("s")[0].as_py()
+    'a.b'
     """
     return Expr(f.substr_index(string.expr, delimiter.expr, count.expr))
 
 
 def substring(string: Expr, position: Expr, length: Expr) -> Expr:
-    """Substring from the ``position`` with ``length`` characters."""
+    """Substring from the ``position`` with ``length`` characters.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["hello world"]})
+    >>> result = df.select(
+    ...     dfn.functions.substring(dfn.col("a"), dfn.lit(1), 
dfn.lit(5)).alias("s"))
+    >>> result.collect_column("s")[0].as_py()
+    'hello'
+    """
     return Expr(f.substring(string.expr, position.expr, length.expr))
 
 
@@ -1548,7 +1819,16 @@ def tanh(arg: Expr) -> Expr:
 
 
 def to_hex(arg: Expr) -> Expr:
-    """Converts an integer to a hexadecimal string."""
+    """Converts an integer to a hexadecimal string.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": [255]})
+    >>> result = df.select(dfn.functions.to_hex(dfn.col("a")).alias("hex"))
+    >>> result.collect_column("hex")[0].as_py()
+    'ff'
+    """
     return Expr(f.to_hex(arg.expr))
 
 
@@ -1865,12 +2145,32 @@ def make_date(year: Expr, month: Expr, day: Expr) -> 
Expr:
 
 
 def translate(string: Expr, from_val: Expr, to_val: Expr) -> Expr:
-    """Replaces the characters in ``from_val`` with the counterpart in 
``to_val``."""
+    """Replaces the characters in ``from_val`` with the counterpart in 
``to_val``.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["hello"]})
+    >>> result = df.select(
+    ...     dfn.functions.translate(dfn.col("a"), dfn.lit("helo"),
+    ...     dfn.lit("HELO")).alias("t"))
+    >>> result.collect_column("t")[0].as_py()
+    'HELLO'
+    """
     return Expr(f.translate(string.expr, from_val.expr, to_val.expr))
 
 
 def trim(arg: Expr) -> Expr:
-    """Removes all characters, spaces by default, from both sides of a 
string."""
+    """Removes all characters, spaces by default, from both sides of a string.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["  hello  "]})
+    >>> result = df.select(dfn.functions.trim(dfn.col("a")).alias("t"))
+    >>> result.collect_column("t")[0].as_py()
+    'hello'
+    """
     return Expr(f.trim(arg.expr))
 
 
@@ -1890,7 +2190,16 @@ def trunc(num: Expr, precision: Expr | None = None) -> 
Expr:
 
 
 def upper(arg: Expr) -> Expr:
-    """Converts a string to uppercase."""
+    """Converts a string to uppercase.
+
+    Examples:
+    ---------
+    >>> ctx = dfn.SessionContext()
+    >>> df = ctx.from_pydict({"a": ["hello"]})
+    >>> result = df.select(dfn.functions.upper(dfn.col("a")).alias("u"))
+    >>> result.collect_column("u")[0].as_py()
+    'HELLO'
+    """
     return Expr(f.upper(arg.expr))
 
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datafusion-python) branch main updated: Add docstring examples for Scalar string functions (#1423)

Reply via email to