(hamilton) 06/06: Fix spark failures on py3.14

skrawcz Mon, 16 Feb 2026 20:18:56 -0800

This is an automated email from the ASF dual-hosted git repository.

skrawcz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hamilton.git


commit f0b80121e38fc415581f497cdbd676132ac361ba
Author: Dev-iL <[email protected]>
AuthorDate: Sun Feb 15 10:36:27 2026 +0200

    Fix spark failures on py3.14
---
 hamilton/plugins/h_spark.py          | 15 +++++++++++----
 plugin_tests/h_spark/test_h_spark.py |  3 +--
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/hamilton/plugins/h_spark.py b/hamilton/plugins/h_spark.py
index 576cd4c7..1033f137 100644
--- a/hamilton/plugins/h_spark.py
+++ b/hamilton/plugins/h_spark.py
@@ -321,12 +321,15 @@ def _inspect_kwargs(kwargs: dict[str, Any]) -> 
tuple[DataFrame, dict[str, Any]]:
 def _format_pandas_udf(func_name: str, ordered_params: list[str]) -> str:
     formatting_params = {
         "name": func_name,
-        "return_type": "pd.Series",
-        "params": ", ".join([f"{param}: pd.Series" for param in 
ordered_params]),
+        "params": ", ".join(ordered_params),
         "param_call": ", ".join([f"{param}={param}" for param in 
ordered_params]),
     }
+    # NOTE: we intentionally omit type annotations here. The return type is 
passed
+    # explicitly to pyspark's pandas_udf(), and parameter annotations are not 
needed.
+    # On Python 3.14+, annotations in dynamically compiled code create 
__annotate__
+    # functions (PEP 749) that break PySpark's UDF serialization.
     func_string = """
-def {name}({params}) -> {return_type}:
+def {name}({params}):
     return partial_fn({param_call})
 """.format(**formatting_params)
     return func_string
@@ -380,7 +383,11 @@ def _fabricate_spark_function(
     else:
         func_string = _format_udf(func_name, ordered_params)
     module_code = compile(func_string, "<string>", "exec")
-    func_code = [c for c in module_code.co_consts if isinstance(c, 
CodeType)][0]
+    # Filter by name to avoid picking up __annotate__ or other helper code 
objects
+    # that Python 3.14+ may generate alongside the function (PEP 749).
+    func_code = [
+        c for c in module_code.co_consts if isinstance(c, CodeType) and 
c.co_name == func_name
+    ][0]
     return FunctionType(func_code, {**globals(), **{"partial_fn": 
partial_fn}}, func_name)
 
 
diff --git a/plugin_tests/h_spark/test_h_spark.py 
b/plugin_tests/h_spark/test_h_spark.py
index 809e3f74..93a4f889 100644
--- a/plugin_tests/h_spark/test_h_spark.py
+++ b/plugin_tests/h_spark/test_h_spark.py
@@ -735,8 +735,7 @@ def test_with_columns_generate_nodes_specify_namespace():
 
 def test__format_pandas_udf():
     assert (
-        h_spark._format_pandas_udf("foo", ["a", "b"]).strip()
-        == "def foo(a: pd.Series, b: pd.Series) -> pd.Series:\n"
+        h_spark._format_pandas_udf("foo", ["a", "b"]).strip() == "def foo(a, 
b):\n"
         "    return partial_fn(a=a, b=b)"
     )

(hamilton) 06/06: Fix spark failures on py3.14

Reply via email to