HyukjinKwon commented on code in PR #53188:
URL: https://github.com/apache/spark/pull/53188#discussion_r2554465033
##########
python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py:
##########
@@ -867,10 +872,11 @@ def _test_apply_in_pandas(self, f, output_schema="id
long, mean double"):
def _test_apply_in_pandas_returning_empty_dataframe(self, empty_df):
"""Tests some returned DataFrames are empty."""
df = self.data
+ f = copy.deepcopy(ApplyInPandasTestsMixin.stats_with_no_column_names)
Review Comment:
```
======================================================================
ERROR [0.770s]: test_apply_in_pandas_returning_column_names
(pyspark.sql.tests.connect.pandas.test_parity_pandas_grouped_map.GroupedApplyInPandasTests.test_apply_in_pandas_returning_column_names)
----------------------------------------------------------------------
Traceback (most recent call last):
File
"/home/runner/work/spark/spark-4.0/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py",
line 303, in test_apply_in_pandas_returning_column_names
self._test_apply_in_pandas(GroupedApplyInPandasTestsMixin.stats_with_column_names)
File
"/home/runner/work/spark/spark-4.0/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py",
line 839, in _test_apply_in_pandas
df.groupby("id").applyInPandas(f, schema=output_schema).sort("id",
"mean").toPandas()
File
"/home/runner/work/spark/spark-4.0/python/pyspark/sql/connect/dataframe.py",
line 1807, in toPandas
pdf, ei = self._session.client.to_pandas(query, self._plan.observations)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark-4.0/python/pyspark/sql/connect/client/core.py",
line 948, in to_pandas
table, schema, metrics, observed_metrics, _ = self._execute_and_fetch(
^^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark-4.0/python/pyspark/sql/connect/client/core.py",
line 1560, in _execute_and_fetch
for response in self._execute_and_fetch_as_iterator(
File
"/home/runner/work/spark/spark-4.0/python/pyspark/sql/connect/client/core.py",
line 1537, in _execute_and_fetch_as_iterator
self._handle_error(error)
File
"/home/runner/work/spark/spark-4.0/python/pyspark/sql/connect/client/core.py",
line 1811, in _handle_error
self._handle_rpc_error(error)
File
"/home/runner/work/spark/spark-4.0/python/pyspark/sql/connect/client/core.py",
line 1882, in _handle_rpc_error
raise convert_exception(
pyspark.errors.exceptions.connect.PythonException:
An exception was thrown from the Python worker. Please see the stack trace
below.
Traceback (most recent call last):
File
"/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line
3375, in main
func, profiler, deserializer, serializer = read_udfs(pickleSer, infile,
eval_type)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line
2982, in read_udfs
arg_offsets, f = read_single_udf(
^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line
1306, in read_single_udf
f, return_type = read_command(pickleSer, infile)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker_util.py",
line 64, in read_command
command = serializer._read_with_length(file)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/serializers.py",
line 173, in _read_with_length
return self.loads(obj)
^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/serializers.py",
line 461, in loads
return cloudpickle.loads(obj, encoding=encoding)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: Can't get attribute
'GroupedApplyInPandasTestsMixin.stats_with_column_names' on <module
'pyspark.sql.tests.pandas.test_pandas_grouped_map' from
'/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/sql/tests/pandas/test_pandas_grouped_map.py'>
======================================================================
ERROR [0.766s]: test_apply_in_pandas_returning_column_names_sometimes
(pyspark.sql.tests.connect.pandas.test_parity_pandas_grouped_map.GroupedApplyInPandasTests.test_apply_in_pandas_returning_column_names_sometimes)
----------------------------------------------------------------------
Traceback (most recent call last):
File
"/home/runner/work/spark/spark-4.0/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py",
line 315, in test_apply_in_pandas_returning_column_names_sometimes
pdf, ei = self._session.client.to_pandas(query, self._plan.observations)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark-4.0/python/pyspark/sql/connect/client/core.py",
line 948, in to_pandas
table, schema, metrics, observed_metrics, _ = self._execute_and_fetch(
^^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark-4.0/python/pyspark/sql/connect/client/core.py",
line 1560, in _execute_and_fetch
for response in self._execute_and_fetch_as_iterator(
File
"/home/runner/work/spark/spark-4.0/python/pyspark/sql/connect/client/core.py",
line 1537, in _execute_and_fetch_as_iterator
self._handle_error(error)
File
"/home/runner/work/spark/spark-4.0/python/pyspark/sql/connect/client/core.py",
line 1811, in _handle_error
self._handle_rpc_error(error)
File
"/home/runner/work/spark/spark-4.0/python/pyspark/sql/connect/client/core.py",
line 1882, in _handle_rpc_error
raise convert_exception(
pyspark.errors.exceptions.connect.PythonException:
An exception was thrown from the Python worker. Please see the stack trace
below.
Traceback (most recent call last):
File
"/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line
3375, in main
func, profiler, deserializer, serializer = read_udfs(pickleSer, infile,
eval_type)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line
2982, in read_udfs
arg_offsets, f = read_single_udf(
^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line
1306, in read_single_udf
f, return_type = read_command(pickleSer, infile)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker_util.py",
line 64, in read_command
command = serializer._read_with_length(file)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/serializers.py",
line 173, in _read_with_length
return self.loads(obj)
^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/serializers.py",
line 461, in loads
return cloudpickle.loads(obj, encoding=encoding)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: Can't get attribute
'GroupedApplyInPandasTestsMixin.stats_with_no_column_names' on <module
'pyspark.sql.tests.pandas.test_pandas_grouped_map' from
'/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/sq
```
Related PR: https://github.com/apache/spark/pull/52303
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]