[
https://issues.apache.org/jira/browse/SPARK-48084?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Dongjoon Hyun closed SPARK-48084.
---------------------------------
> pyspark.ml.connect.evaluation not working in 3.5 client <> 4.0 server
> ---------------------------------------------------------------------
>
> Key: SPARK-48084
> URL: https://issues.apache.org/jira/browse/SPARK-48084
> Project: Spark
> Issue Type: Sub-task
> Components: ML, PySpark
> Affects Versions: 4.0.0
> Reporter: Hyukjin Kwon
> Assignee: Weichen Xu
> Priority: Major
> Fix For: 3.5.2
>
>
> {code}
> ======================================================================
> ERROR [3.966s]: test_regressor_evaluator
> (pyspark.ml.tests.connect.test_connect_evaluation.EvaluationTestsOnConnect.test_regressor_evaluator)
> ----------------------------------------------------------------------
> Traceback (most recent call last):
> File
> "/home/runner/work/spark/spark-3.5/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py",
> line 69, in test_regressor_evaluator
> rmse = rmse_evaluator.evaluate(df1)
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/base.py",
> line 255, in evaluate
> return self._evaluate(dataset)
> ^^^^^^^^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/evaluation.py",
> line 70, in _evaluate
> return aggregate_dataframe(
> ^^^^^^^^^^^^^^^^^^^^
> File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/util.py",
> line 93, in aggregate_dataframe
> state = cloudpickle.loads(state)
> ^^^^^^^^^^^^^^^^^^^^^^^^
> AttributeError: Can't get attribute '_class_setstate' on <module
> 'pyspark.cloudpickle.cloudpickle' from
> '/home/runner/work/spark/spark-3.5/python/pyspark/cloudpickle/cloudpickle.py'>
> ----------------------------------------------------------------------
> {code}
> {code}
> ======================================================================
> ERROR [4.664s]: test_copy
> (pyspark.ml.tests.connect.test_connect_tuning.CrossValidatorTestsOnConnect.test_copy)
> ----------------------------------------------------------------------
> Traceback (most recent call last):
> File
> "/home/runner/work/spark/spark-3.5/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py",
> line 115, in test_copy
> cvModel = cv.fit(dataset)
> ^^^^^^^^^^^^^^^
> File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/base.py",
> line 106, in fit
> return self._fit(dataset)
> ^^^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/tuning.py", line
> 437, in _fit
> for j, metric in pool.imap_unordered(lambda f: f(), tasks):
> File
> "/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/multiprocessing/pool.py",
> line 873, in next
> raise value
> File
> "/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/multiprocessing/pool.py",
> line 125, in worker
> result = (True, func(*args, **kwds))
> ^^^^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/tuning.py", line
> 437, in <lambda>
> for j, metric in pool.imap_unordered(lambda f: f(), tasks):
> ^^^
> File
> "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/tuning.py", line
> 188, in single_task
> metric = evaluator.evaluate(
> ^^^^^^^^^^^^^^^^^^^
> File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/base.py",
> line 255, in evaluate
> return self._evaluate(dataset)
> ^^^^^^^^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/evaluation.py",
> line 70, in _evaluate
> return aggregate_dataframe(
> ^^^^^^^^^^^^^^^^^^^^
> File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/util.py",
> line 93, in aggregate_dataframe
> state = cloudpickle.loads(state)
> ^^^^^^^^^^^^^^^^^^^^^^^^
> AttributeError: Can't get attribute '_class_setstate' on <module
> 'pyspark.cloudpickle.cloudpickle' from
> '/home/runner/work/spark/spark-3.5/python/pyspark/cloudpickle/cloudpickle.py'>
> {code}
> {code}
> ======================================================================
> ERROR [3.938s]: test_fit_minimize_metric
> (pyspark.ml.tests.connect.test_connect_tuning.CrossValidatorTestsOnConnect.test_fit_minimize_metric)
> ----------------------------------------------------------------------
> Traceback (most recent call last):
> File
> "/home/runner/work/spark/spark-3.5/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py",
> line 149, in test_fit_minimize_metric
> cvModel = cv.fit(dataset)
> ^^^^^^^^^^^^^^^
> File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/base.py",
> line 106, in fit
> return self._fit(dataset)
> ^^^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/tuning.py", line
> 437, in _fit
> for j, metric in pool.imap_unordered(lambda f: f(), tasks):
> File
> "/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/multiprocessing/pool.py",
> line 873, in next
> raise value
> File
> "/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/multiprocessing/pool.py",
> line 125, in worker
> result = (True, func(*args, **kwds))
> ^^^^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/tuning.py", line
> 437, in <lambda>
> for j, metric in pool.imap_unordered(lambda f: f(), tasks):
> ^^^
> File
> "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/tuning.py", line
> 188, in single_task
> metric = evaluator.evaluate(
> ^^^^^^^^^^^^^^^^^^^
> File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/base.py",
> line 255, in evaluate
> return self._evaluate(dataset)
> ^^^^^^^^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/evaluation.py",
> line 70, in _evaluate
> return aggregate_dataframe(
> ^^^^^^^^^^^^^^^^^^^^
> File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/util.py",
> line 93, in aggregate_dataframe
> state = cloudpickle.loads(state)
> ^^^^^^^^^^^^^^^^^^^^^^^^
> AttributeError: Can't get attribute '_class_setstate' on <module
> 'pyspark.cloudpickle.cloudpickle' from
> '/home/runner/work/spark/spark-3.5/python/pyspark/cloudpickle/cloudpickle.py'>
> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]