Hyukjin Kwon created SPARK-48084:
------------------------------------
Summary: pyspark.ml.connect.evaluation not working in 3.5 client
<> 4.0 server
Key: SPARK-48084
URL: https://issues.apache.org/jira/browse/SPARK-48084
Project: Spark
Issue Type: Sub-task
Components: ML, PySpark
Affects Versions: 4.0.0
Reporter: Hyukjin Kwon
{code}
======================================================================
ERROR [3.966s]: test_regressor_evaluator
(pyspark.ml.tests.connect.test_connect_evaluation.EvaluationTestsOnConnect.test_regressor_evaluator)
----------------------------------------------------------------------
Traceback (most recent call last):
File
"/home/runner/work/spark/spark-3.5/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py",
line 69, in test_regressor_evaluator
rmse = rmse_evaluator.evaluate(df1)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/base.py",
line 255, in evaluate
return self._evaluate(dataset)
^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/evaluation.py",
line 70, in _evaluate
return aggregate_dataframe(
^^^^^^^^^^^^^^^^^^^^
File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/util.py",
line 93, in aggregate_dataframe
state = cloudpickle.loads(state)
^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: Can't get attribute '_class_setstate' on <module
'pyspark.cloudpickle.cloudpickle' from
'/home/runner/work/spark/spark-3.5/python/pyspark/cloudpickle/cloudpickle.py'>
----------------------------------------------------------------------
{code}
{code}
======================================================================
ERROR [4.664s]: test_copy
(pyspark.ml.tests.connect.test_connect_tuning.CrossValidatorTestsOnConnect.test_copy)
----------------------------------------------------------------------
Traceback (most recent call last):
File
"/home/runner/work/spark/spark-3.5/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py",
line 115, in test_copy
cvModel = cv.fit(dataset)
^^^^^^^^^^^^^^^
File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/base.py",
line 106, in fit
return self._fit(dataset)
^^^^^^^^^^^^^^^^^^
File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/tuning.py",
line 437, in _fit
for j, metric in pool.imap_unordered(lambda f: f(), tasks):
File
"/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/multiprocessing/pool.py",
line 873, in next
raise value
File
"/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/multiprocessing/pool.py",
line 125, in worker
result = (True, func(*args, **kwds))
^^^^^^^^^^^^^^^^^^^
File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/tuning.py",
line 437, in <lambda>
for j, metric in pool.imap_unordered(lambda f: f(), tasks):
^^^
File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/tuning.py",
line 188, in single_task
metric = evaluator.evaluate(
^^^^^^^^^^^^^^^^^^^
File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/base.py",
line 255, in evaluate
return self._evaluate(dataset)
^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/evaluation.py",
line 70, in _evaluate
return aggregate_dataframe(
^^^^^^^^^^^^^^^^^^^^
File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/util.py",
line 93, in aggregate_dataframe
state = cloudpickle.loads(state)
^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: Can't get attribute '_class_setstate' on <module
'pyspark.cloudpickle.cloudpickle' from
'/home/runner/work/spark/spark-3.5/python/pyspark/cloudpickle/cloudpickle.py'>
{code}
{code}
======================================================================
ERROR [3.938s]: test_fit_minimize_metric
(pyspark.ml.tests.connect.test_connect_tuning.CrossValidatorTestsOnConnect.test_fit_minimize_metric)
----------------------------------------------------------------------
Traceback (most recent call last):
File
"/home/runner/work/spark/spark-3.5/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py",
line 149, in test_fit_minimize_metric
cvModel = cv.fit(dataset)
^^^^^^^^^^^^^^^
File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/base.py",
line 106, in fit
return self._fit(dataset)
^^^^^^^^^^^^^^^^^^
File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/tuning.py",
line 437, in _fit
for j, metric in pool.imap_unordered(lambda f: f(), tasks):
File
"/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/multiprocessing/pool.py",
line 873, in next
raise value
File
"/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/multiprocessing/pool.py",
line 125, in worker
result = (True, func(*args, **kwds))
^^^^^^^^^^^^^^^^^^^
File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/tuning.py",
line 437, in <lambda>
for j, metric in pool.imap_unordered(lambda f: f(), tasks):
^^^
File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/tuning.py",
line 188, in single_task
metric = evaluator.evaluate(
^^^^^^^^^^^^^^^^^^^
File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/base.py",
line 255, in evaluate
return self._evaluate(dataset)
^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/evaluation.py",
line 70, in _evaluate
return aggregate_dataframe(
^^^^^^^^^^^^^^^^^^^^
File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/util.py",
line 93, in aggregate_dataframe
state = cloudpickle.loads(state)
^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: Can't get attribute '_class_setstate' on <module
'pyspark.cloudpickle.cloudpickle' from
'/home/runner/work/spark/spark-3.5/python/pyspark/cloudpickle/cloudpickle.py'>
{code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]