Copilot commented on code in PR #1470:
URL:
https://github.com/apache/datafusion-python/pull/1470#discussion_r3034074031
##########
python/tests/test_functions.py:
##########
@@ -1435,3 +1435,73 @@ def test_coalesce(df):
assert result.column(0) == pa.array(
["Hello", "fallback", "!"], type=pa.string_view()
)
+
+
+def test_get_field(df):
+ df = df.with_column(
+ "s",
+ f.named_struct(
+ [
+ ("x", column("a")),
+ ("y", column("b")),
+ ]
+ ),
+ )
+ result = df.select(
+ f.get_field(column("s"), string_literal("x")).alias("x_val"),
+ f.get_field(column("s"), string_literal("y")).alias("y_val"),
+ ).collect()[0]
+
+ assert result.column(0) == pa.array(["Hello", "World", "!"],
type=pa.string_view())
+ assert result.column(1) == pa.array([4, 5, 6])
+
+
+def test_arrow_metadata(df):
+ result = df.select(
+ f.arrow_metadata(column("a")).alias("meta"),
+ ).collect()[0]
+ # The metadata column should be returned as a map type (possibly empty)
+ assert result.column(0).type == pa.map_(pa.utf8(), pa.utf8())
Review Comment:
`test_arrow_metadata` currently only asserts the returned Arrow type is
`map<utf8, utf8>` and doesn’t verify contents or the 2-argument form (lookup by
key). Consider extending this test to first create a column with known field
metadata (e.g., via `alias(..., metadata={...})`) and then assert (1) the map
contains the expected key/value and (2) `arrow_metadata(expr, key)` returns the
expected value.
```suggestion
metadata_key = "source"
metadata_value = "unit-test"
df = df.select(
column("a").alias("a_with_meta", metadata={metadata_key:
metadata_value})
)
result = df.select(
f.arrow_metadata(column("a_with_meta")).alias("meta"),
f.arrow_metadata(column("a_with_meta"),
string_literal(metadata_key)).alias(
"meta_value"
),
).collect()[0]
# The metadata column should be returned as a map type and contain the
# expected key/value for each row.
assert result.column(0).type == pa.map_(pa.utf8(), pa.utf8())
assert result.column(0).to_pylist() == [[(metadata_key,
metadata_value)]] * len(
result.column(0)
)
assert result.column(1).to_pylist() == [metadata_value] *
len(result.column(1))
```
##########
python/datafusion/functions.py:
##########
@@ -90,6 +90,7 @@
"array_to_string",
"array_union",
"arrow_cast",
+ "arrow_metadata",
"arrow_typeof",
"ascii",
"asin",
Review Comment:
The PR description/linked issue (#1453) includes exposing `arrow_try_cast`,
but this PR only adds `arrow_metadata` (and others). Please either
implement/export `arrow_try_cast` (Rust binding + Python wrapper + tests) or
update the PR scope/issue closure accordingly so we don’t close the issue with
functionality still missing.
##########
crates/core/src/functions.rs:
##########
@@ -631,8 +631,29 @@ expr_fn_vec!(named_struct);
expr_fn!(from_unixtime, unixtime);
expr_fn!(arrow_typeof, arg_1);
expr_fn!(arrow_cast, arg_1 datatype);
+expr_fn_vec!(arrow_metadata);
+expr_fn!(union_tag, arg1);
expr_fn!(random);
Review Comment:
`arrow_try_cast` is part of the missing-function list for issue #1453, but
there is no Rust binding/export added here (only `arrow_metadata`, `union_tag`,
`get_field`, `union_extract`, `version`). Please add the binding for
`arrow_try_cast` (and expose it in the module) or adjust the PR/issue closure
to match what’s actually implemented.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]