This is an automated email from the ASF dual-hosted git repository. skrawcz pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/hamilton.git
commit f0b80121e38fc415581f497cdbd676132ac361ba Author: Dev-iL <[email protected]> AuthorDate: Sun Feb 15 10:36:27 2026 +0200 Fix spark failures on py3.14 --- hamilton/plugins/h_spark.py | 15 +++++++++++---- plugin_tests/h_spark/test_h_spark.py | 3 +-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/hamilton/plugins/h_spark.py b/hamilton/plugins/h_spark.py index 576cd4c7..1033f137 100644 --- a/hamilton/plugins/h_spark.py +++ b/hamilton/plugins/h_spark.py @@ -321,12 +321,15 @@ def _inspect_kwargs(kwargs: dict[str, Any]) -> tuple[DataFrame, dict[str, Any]]: def _format_pandas_udf(func_name: str, ordered_params: list[str]) -> str: formatting_params = { "name": func_name, - "return_type": "pd.Series", - "params": ", ".join([f"{param}: pd.Series" for param in ordered_params]), + "params": ", ".join(ordered_params), "param_call": ", ".join([f"{param}={param}" for param in ordered_params]), } + # NOTE: we intentionally omit type annotations here. The return type is passed + # explicitly to pyspark's pandas_udf(), and parameter annotations are not needed. + # On Python 3.14+, annotations in dynamically compiled code create __annotate__ + # functions (PEP 749) that break PySpark's UDF serialization. func_string = """ -def {name}({params}) -> {return_type}: +def {name}({params}): return partial_fn({param_call}) """.format(**formatting_params) return func_string @@ -380,7 +383,11 @@ def _fabricate_spark_function( else: func_string = _format_udf(func_name, ordered_params) module_code = compile(func_string, "<string>", "exec") - func_code = [c for c in module_code.co_consts if isinstance(c, CodeType)][0] + # Filter by name to avoid picking up __annotate__ or other helper code objects + # that Python 3.14+ may generate alongside the function (PEP 749). + func_code = [ + c for c in module_code.co_consts if isinstance(c, CodeType) and c.co_name == func_name + ][0] return FunctionType(func_code, {**globals(), **{"partial_fn": partial_fn}}, func_name) diff --git a/plugin_tests/h_spark/test_h_spark.py b/plugin_tests/h_spark/test_h_spark.py index 809e3f74..93a4f889 100644 --- a/plugin_tests/h_spark/test_h_spark.py +++ b/plugin_tests/h_spark/test_h_spark.py @@ -735,8 +735,7 @@ def test_with_columns_generate_nodes_specify_namespace(): def test__format_pandas_udf(): assert ( - h_spark._format_pandas_udf("foo", ["a", "b"]).strip() - == "def foo(a: pd.Series, b: pd.Series) -> pd.Series:\n" + h_spark._format_pandas_udf("foo", ["a", "b"]).strip() == "def foo(a, b):\n" " return partial_fn(a=a, b=b)" )
