ntjohnson1 commented on code in PR #1470:
URL:
https://github.com/apache/datafusion-python/pull/1470#discussion_r3039132251
##########
python/datafusion/functions.py:
##########
@@ -2596,22 +2602,111 @@ def arrow_typeof(arg: Expr) -> Expr:
return Expr(f.arrow_typeof(arg.expr))
-def arrow_cast(expr: Expr, data_type: Expr) -> Expr:
+def arrow_cast(expr: Expr, data_type: Expr | str) -> Expr:
"""Casts an expression to a specified data type.
Examples:
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1]})
- >>> data_type = dfn.string_literal("Float64")
>>> result = df.select(
- ... dfn.functions.arrow_cast(dfn.col("a"), data_type).alias("c")
+ ... dfn.functions.arrow_cast(dfn.col("a"), "Float64").alias("c")
... )
>>> result.collect_column("c")[0].as_py()
1.0
"""
+ if isinstance(data_type, str):
+ data_type = Expr.string_literal(data_type)
return Expr(f.arrow_cast(expr.expr, data_type.expr))
+def arrow_metadata(expr: Expr, key: Expr | str | None = None) -> Expr:
+ """Returns the metadata of the input expression.
+
+ If called with one argument, returns a Map of all metadata key-value pairs.
+ If called with two arguments, returns the value for the specified metadata
key.
+
+ Args:
+ expr: An expression whose metadata to retrieve.
+ key: Optional metadata key to look up. Can be a string or an Expr.
+
+ Returns:
+ A Map of metadata or a specific metadata value.
+ """
+ if key is None:
+ return Expr(f.arrow_metadata(expr.expr))
+ if isinstance(key, str):
+ key = Expr.string_literal(key)
+ return Expr(f.arrow_metadata(expr.expr, key.expr))
+
+
+def get_field(expr: Expr, name: Expr | str) -> Expr:
+ """Extracts a field from a struct or map by name.
+
+ Args:
+ expr: A struct or map expression.
+ name: The field name to extract.
+
+ Returns:
Review Comment:
Echo example/returns note above
##########
python/datafusion/functions.py:
##########
@@ -2596,22 +2602,111 @@ def arrow_typeof(arg: Expr) -> Expr:
return Expr(f.arrow_typeof(arg.expr))
-def arrow_cast(expr: Expr, data_type: Expr) -> Expr:
+def arrow_cast(expr: Expr, data_type: Expr | str) -> Expr:
"""Casts an expression to a specified data type.
Examples:
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1]})
- >>> data_type = dfn.string_literal("Float64")
>>> result = df.select(
- ... dfn.functions.arrow_cast(dfn.col("a"), data_type).alias("c")
+ ... dfn.functions.arrow_cast(dfn.col("a"), "Float64").alias("c")
... )
>>> result.collect_column("c")[0].as_py()
1.0
"""
+ if isinstance(data_type, str):
+ data_type = Expr.string_literal(data_type)
return Expr(f.arrow_cast(expr.expr, data_type.expr))
+def arrow_metadata(expr: Expr, key: Expr | str | None = None) -> Expr:
+ """Returns the metadata of the input expression.
+
+ If called with one argument, returns a Map of all metadata key-value pairs.
+ If called with two arguments, returns the value for the specified metadata
key.
+
+ Args:
+ expr: An expression whose metadata to retrieve.
+ key: Optional metadata key to look up. Can be a string or an Expr.
+
+ Returns:
Review Comment:
No example and I think nothing else in the file uses the Returns category.
I'm not sure how consistent that is across the code base.
##########
python/datafusion/functions.py:
##########
@@ -2596,22 +2602,111 @@ def arrow_typeof(arg: Expr) -> Expr:
return Expr(f.arrow_typeof(arg.expr))
-def arrow_cast(expr: Expr, data_type: Expr) -> Expr:
+def arrow_cast(expr: Expr, data_type: Expr | str) -> Expr:
"""Casts an expression to a specified data type.
Examples:
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1]})
- >>> data_type = dfn.string_literal("Float64")
>>> result = df.select(
- ... dfn.functions.arrow_cast(dfn.col("a"), data_type).alias("c")
+ ... dfn.functions.arrow_cast(dfn.col("a"), "Float64").alias("c")
... )
>>> result.collect_column("c")[0].as_py()
1.0
"""
+ if isinstance(data_type, str):
+ data_type = Expr.string_literal(data_type)
return Expr(f.arrow_cast(expr.expr, data_type.expr))
+def arrow_metadata(expr: Expr, key: Expr | str | None = None) -> Expr:
+ """Returns the metadata of the input expression.
+
+ If called with one argument, returns a Map of all metadata key-value pairs.
+ If called with two arguments, returns the value for the specified metadata
key.
+
+ Args:
+ expr: An expression whose metadata to retrieve.
+ key: Optional metadata key to look up. Can be a string or an Expr.
+
+ Returns:
+ A Map of metadata or a specific metadata value.
+ """
+ if key is None:
+ return Expr(f.arrow_metadata(expr.expr))
+ if isinstance(key, str):
+ key = Expr.string_literal(key)
+ return Expr(f.arrow_metadata(expr.expr, key.expr))
+
+
+def get_field(expr: Expr, name: Expr | str) -> Expr:
+ """Extracts a field from a struct or map by name.
+
+ Args:
+ expr: A struct or map expression.
+ name: The field name to extract.
+
+ Returns:
+ The value of the named field.
+ """
+ if isinstance(name, str):
+ name = Expr.string_literal(name)
+ return Expr(f.get_field(expr.expr, name.expr))
+
+
+def union_extract(union_expr: Expr, field_name: Expr | str) -> Expr:
+ """Extracts a value from a union type by field name.
+
+ Returns the value of the named field if it is the currently selected
+ variant, otherwise returns NULL.
+
+ Args:
+ union_expr: A union-typed expression.
+ field_name: The name of the field to extract.
+
+ Returns:
+ The extracted value or NULL.
+ """
+ if isinstance(field_name, str):
+ field_name = Expr.string_literal(field_name)
+ return Expr(f.union_extract(union_expr.expr, field_name.expr))
+
+
+def union_tag(union_expr: Expr) -> Expr:
+ """Returns the tag (active field name) of a union type.
+
+ Args:
+ union_expr: A union-typed expression.
+
+ Returns:
+ The name of the currently selected field in the union.
+ """
+ return Expr(f.union_tag(union_expr.expr))
+
+
+def version() -> Expr:
+ """Returns the DataFusion version string.
+
+ Returns:
+ A string describing the DataFusion version.
+ """
+ return Expr(f.version())
+
+
+def row(*args: Expr) -> Expr:
+ """Returns a struct with the given arguments.
+
+ This is an alias for :py:func:`struct`.
Review Comment:
Doesn't use the See Also block
##########
python/datafusion/functions.py:
##########
@@ -2596,22 +2602,111 @@ def arrow_typeof(arg: Expr) -> Expr:
return Expr(f.arrow_typeof(arg.expr))
-def arrow_cast(expr: Expr, data_type: Expr) -> Expr:
+def arrow_cast(expr: Expr, data_type: Expr | str) -> Expr:
"""Casts an expression to a specified data type.
Examples:
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1]})
- >>> data_type = dfn.string_literal("Float64")
>>> result = df.select(
- ... dfn.functions.arrow_cast(dfn.col("a"), data_type).alias("c")
+ ... dfn.functions.arrow_cast(dfn.col("a"), "Float64").alias("c")
... )
>>> result.collect_column("c")[0].as_py()
1.0
"""
+ if isinstance(data_type, str):
Review Comment:
Nice! I don't know if anyone has run into it yet but I wonder if a helper
around strings might be nice. Hopefully most common is people just passing
python strings, but I could image someone passing a numpy string or pyarrow
string extracted from some other operation. Definitely follow on work/issue
##########
python/datafusion/functions.py:
##########
@@ -2596,22 +2602,111 @@ def arrow_typeof(arg: Expr) -> Expr:
return Expr(f.arrow_typeof(arg.expr))
-def arrow_cast(expr: Expr, data_type: Expr) -> Expr:
+def arrow_cast(expr: Expr, data_type: Expr | str) -> Expr:
"""Casts an expression to a specified data type.
Examples:
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1]})
- >>> data_type = dfn.string_literal("Float64")
>>> result = df.select(
- ... dfn.functions.arrow_cast(dfn.col("a"), data_type).alias("c")
+ ... dfn.functions.arrow_cast(dfn.col("a"), "Float64").alias("c")
... )
>>> result.collect_column("c")[0].as_py()
1.0
"""
+ if isinstance(data_type, str):
+ data_type = Expr.string_literal(data_type)
return Expr(f.arrow_cast(expr.expr, data_type.expr))
+def arrow_metadata(expr: Expr, key: Expr | str | None = None) -> Expr:
+ """Returns the metadata of the input expression.
+
+ If called with one argument, returns a Map of all metadata key-value pairs.
+ If called with two arguments, returns the value for the specified metadata
key.
+
+ Args:
+ expr: An expression whose metadata to retrieve.
+ key: Optional metadata key to look up. Can be a string or an Expr.
+
+ Returns:
+ A Map of metadata or a specific metadata value.
+ """
+ if key is None:
+ return Expr(f.arrow_metadata(expr.expr))
+ if isinstance(key, str):
+ key = Expr.string_literal(key)
+ return Expr(f.arrow_metadata(expr.expr, key.expr))
+
+
+def get_field(expr: Expr, name: Expr | str) -> Expr:
+ """Extracts a field from a struct or map by name.
+
+ Args:
+ expr: A struct or map expression.
+ name: The field name to extract.
+
+ Returns:
+ The value of the named field.
+ """
+ if isinstance(name, str):
+ name = Expr.string_literal(name)
+ return Expr(f.get_field(expr.expr, name.expr))
+
+
+def union_extract(union_expr: Expr, field_name: Expr | str) -> Expr:
+ """Extracts a value from a union type by field name.
+
+ Returns the value of the named field if it is the currently selected
+ variant, otherwise returns NULL.
+
+ Args:
+ union_expr: A union-typed expression.
+ field_name: The name of the field to extract.
+
+ Returns:
+ The extracted value or NULL.
+ """
+ if isinstance(field_name, str):
+ field_name = Expr.string_literal(field_name)
+ return Expr(f.union_extract(union_expr.expr, field_name.expr))
+
+
+def union_tag(union_expr: Expr) -> Expr:
+ """Returns the tag (active field name) of a union type.
+
+ Args:
+ union_expr: A union-typed expression.
+
+ Returns:
+ The name of the currently selected field in the union.
+ """
+ return Expr(f.union_tag(union_expr.expr))
+
+
+def version() -> Expr:
+ """Returns the DataFusion version string.
+
+ Returns:
+ A string describing the DataFusion version.
+ """
+ return Expr(f.version())
+
+
+def row(*args: Expr) -> Expr:
+ """Returns a struct with the given arguments.
+
+ This is an alias for :py:func:`struct`.
+
+ Args:
+ args: The expressions to include in the struct.
+
+ Returns:
Review Comment:
example/returns
##########
python/datafusion/functions.py:
##########
@@ -2596,22 +2602,111 @@ def arrow_typeof(arg: Expr) -> Expr:
return Expr(f.arrow_typeof(arg.expr))
-def arrow_cast(expr: Expr, data_type: Expr) -> Expr:
+def arrow_cast(expr: Expr, data_type: Expr | str) -> Expr:
"""Casts an expression to a specified data type.
Examples:
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1]})
- >>> data_type = dfn.string_literal("Float64")
>>> result = df.select(
- ... dfn.functions.arrow_cast(dfn.col("a"), data_type).alias("c")
+ ... dfn.functions.arrow_cast(dfn.col("a"), "Float64").alias("c")
... )
>>> result.collect_column("c")[0].as_py()
1.0
"""
+ if isinstance(data_type, str):
+ data_type = Expr.string_literal(data_type)
return Expr(f.arrow_cast(expr.expr, data_type.expr))
+def arrow_metadata(expr: Expr, key: Expr | str | None = None) -> Expr:
+ """Returns the metadata of the input expression.
+
+ If called with one argument, returns a Map of all metadata key-value pairs.
+ If called with two arguments, returns the value for the specified metadata
key.
+
+ Args:
+ expr: An expression whose metadata to retrieve.
+ key: Optional metadata key to look up. Can be a string or an Expr.
+
+ Returns:
+ A Map of metadata or a specific metadata value.
+ """
+ if key is None:
+ return Expr(f.arrow_metadata(expr.expr))
+ if isinstance(key, str):
+ key = Expr.string_literal(key)
+ return Expr(f.arrow_metadata(expr.expr, key.expr))
+
+
+def get_field(expr: Expr, name: Expr | str) -> Expr:
+ """Extracts a field from a struct or map by name.
+
+ Args:
+ expr: A struct or map expression.
+ name: The field name to extract.
+
+ Returns:
+ The value of the named field.
+ """
+ if isinstance(name, str):
+ name = Expr.string_literal(name)
+ return Expr(f.get_field(expr.expr, name.expr))
+
+
+def union_extract(union_expr: Expr, field_name: Expr | str) -> Expr:
+ """Extracts a value from a union type by field name.
+
+ Returns the value of the named field if it is the currently selected
+ variant, otherwise returns NULL.
+
+ Args:
+ union_expr: A union-typed expression.
+ field_name: The name of the field to extract.
+
+ Returns:
Review Comment:
example/returns
##########
python/datafusion/functions.py:
##########
@@ -2596,22 +2602,111 @@ def arrow_typeof(arg: Expr) -> Expr:
return Expr(f.arrow_typeof(arg.expr))
-def arrow_cast(expr: Expr, data_type: Expr) -> Expr:
+def arrow_cast(expr: Expr, data_type: Expr | str) -> Expr:
"""Casts an expression to a specified data type.
Examples:
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1]})
- >>> data_type = dfn.string_literal("Float64")
>>> result = df.select(
- ... dfn.functions.arrow_cast(dfn.col("a"), data_type).alias("c")
+ ... dfn.functions.arrow_cast(dfn.col("a"), "Float64").alias("c")
... )
>>> result.collect_column("c")[0].as_py()
1.0
"""
+ if isinstance(data_type, str):
+ data_type = Expr.string_literal(data_type)
return Expr(f.arrow_cast(expr.expr, data_type.expr))
+def arrow_metadata(expr: Expr, key: Expr | str | None = None) -> Expr:
+ """Returns the metadata of the input expression.
+
+ If called with one argument, returns a Map of all metadata key-value pairs.
+ If called with two arguments, returns the value for the specified metadata
key.
+
+ Args:
+ expr: An expression whose metadata to retrieve.
+ key: Optional metadata key to look up. Can be a string or an Expr.
+
+ Returns:
+ A Map of metadata or a specific metadata value.
+ """
+ if key is None:
+ return Expr(f.arrow_metadata(expr.expr))
+ if isinstance(key, str):
+ key = Expr.string_literal(key)
+ return Expr(f.arrow_metadata(expr.expr, key.expr))
+
+
+def get_field(expr: Expr, name: Expr | str) -> Expr:
+ """Extracts a field from a struct or map by name.
+
+ Args:
+ expr: A struct or map expression.
+ name: The field name to extract.
+
+ Returns:
+ The value of the named field.
+ """
+ if isinstance(name, str):
+ name = Expr.string_literal(name)
+ return Expr(f.get_field(expr.expr, name.expr))
+
+
+def union_extract(union_expr: Expr, field_name: Expr | str) -> Expr:
+ """Extracts a value from a union type by field name.
+
+ Returns the value of the named field if it is the currently selected
+ variant, otherwise returns NULL.
+
+ Args:
+ union_expr: A union-typed expression.
+ field_name: The name of the field to extract.
+
+ Returns:
+ The extracted value or NULL.
+ """
+ if isinstance(field_name, str):
+ field_name = Expr.string_literal(field_name)
+ return Expr(f.union_extract(union_expr.expr, field_name.expr))
+
+
+def union_tag(union_expr: Expr) -> Expr:
+ """Returns the tag (active field name) of a union type.
+
+ Args:
+ union_expr: A union-typed expression.
+
+ Returns:
+ The name of the currently selected field in the union.
+ """
+ return Expr(f.union_tag(union_expr.expr))
+
+
+def version() -> Expr:
+ """Returns the DataFusion version string.
+
+ Returns:
Review Comment:
example/returns
In this case the returns is definitely redundant with the definition
##########
python/datafusion/functions.py:
##########
@@ -2596,22 +2602,111 @@ def arrow_typeof(arg: Expr) -> Expr:
return Expr(f.arrow_typeof(arg.expr))
-def arrow_cast(expr: Expr, data_type: Expr) -> Expr:
+def arrow_cast(expr: Expr, data_type: Expr | str) -> Expr:
"""Casts an expression to a specified data type.
Examples:
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1]})
- >>> data_type = dfn.string_literal("Float64")
>>> result = df.select(
- ... dfn.functions.arrow_cast(dfn.col("a"), data_type).alias("c")
+ ... dfn.functions.arrow_cast(dfn.col("a"), "Float64").alias("c")
... )
>>> result.collect_column("c")[0].as_py()
1.0
"""
+ if isinstance(data_type, str):
+ data_type = Expr.string_literal(data_type)
return Expr(f.arrow_cast(expr.expr, data_type.expr))
+def arrow_metadata(expr: Expr, key: Expr | str | None = None) -> Expr:
+ """Returns the metadata of the input expression.
+
+ If called with one argument, returns a Map of all metadata key-value pairs.
+ If called with two arguments, returns the value for the specified metadata
key.
+
+ Args:
+ expr: An expression whose metadata to retrieve.
+ key: Optional metadata key to look up. Can be a string or an Expr.
+
+ Returns:
+ A Map of metadata or a specific metadata value.
+ """
+ if key is None:
+ return Expr(f.arrow_metadata(expr.expr))
+ if isinstance(key, str):
+ key = Expr.string_literal(key)
+ return Expr(f.arrow_metadata(expr.expr, key.expr))
+
+
+def get_field(expr: Expr, name: Expr | str) -> Expr:
+ """Extracts a field from a struct or map by name.
+
+ Args:
+ expr: A struct or map expression.
+ name: The field name to extract.
+
+ Returns:
+ The value of the named field.
+ """
+ if isinstance(name, str):
+ name = Expr.string_literal(name)
+ return Expr(f.get_field(expr.expr, name.expr))
+
+
+def union_extract(union_expr: Expr, field_name: Expr | str) -> Expr:
+ """Extracts a value from a union type by field name.
+
+ Returns the value of the named field if it is the currently selected
+ variant, otherwise returns NULL.
+
+ Args:
+ union_expr: A union-typed expression.
+ field_name: The name of the field to extract.
+
+ Returns:
+ The extracted value or NULL.
+ """
+ if isinstance(field_name, str):
+ field_name = Expr.string_literal(field_name)
+ return Expr(f.union_extract(union_expr.expr, field_name.expr))
+
+
+def union_tag(union_expr: Expr) -> Expr:
+ """Returns the tag (active field name) of a union type.
+
+ Args:
+ union_expr: A union-typed expression.
+
+ Returns:
Review Comment:
example/returns
##########
python/tests/test_functions.py:
##########
@@ -20,7 +20,7 @@
import numpy as np
import pyarrow as pa
import pytest
-from datafusion import SessionContext, column, literal, string_literal
Review Comment:
Love that this is no longer needed
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]