This is an automated email from the ASF dual-hosted git repository.

huaxingao pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 8aa644e  [SPARK-32092][ML][PYSPARK][3.0] Removed foldCol related code
8aa644e is described below

commit 8aa644e9a991cd7f965aec082adcc3a3d19d452f
Author: Louiszr <zxhs...@gmail.com>
AuthorDate: Sun Aug 23 21:10:52 2020 -0700

    [SPARK-32092][ML][PYSPARK][3.0] Removed foldCol related code
    
    ### What changes were proposed in this pull request?
    
    - Removed `foldCol` related code introduced in #29445 which is causing 
issues in the base branch.
    - Fixed `CrossValidatorModel.copy()` so that it correctly calls `.copy()` 
on the models instead of lists of models.
    
    ### Why are the changes needed?
    
    - `foldCol` is from 3.1 hence causing tests to fail.
    - `CrossValidatorModel.copy()` is supposed to shallow copy models not lists 
of models.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    - Existing tests created in #29445 ran and passed.
    - Updated `test_copy` to make sure `copy()` is called on models instead of 
lists of models.
    
    Closes #29524 from Louiszr/remove-foldcol-3.0.
    
    Authored-by: Louiszr <zxhs...@gmail.com>
    Signed-off-by: Huaxin Gao <huax...@us.ibm.com>
---
 python/pyspark/ml/tests/test_tuning.py | 11 ++++-------
 python/pyspark/ml/tuning.py            |  7 ++++---
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/python/pyspark/ml/tests/test_tuning.py 
b/python/pyspark/ml/tests/test_tuning.py
index b250740..b1acaf6 100644
--- a/python/pyspark/ml/tests/test_tuning.py
+++ b/python/pyspark/ml/tests/test_tuning.py
@@ -101,7 +101,6 @@ class CrossValidatorTests(SparkSessionTestCase):
             lambda x: x.getEstimator().uid,
             # SPARK-32092: CrossValidator.copy() needs to copy all existing 
params
             lambda x: x.getNumFolds(),
-            lambda x: x.getFoldCol(),
             lambda x: x.getCollectSubModels(),
             lambda x: x.getParallelism(),
             lambda x: x.getSeed()
@@ -116,7 +115,6 @@ class CrossValidatorTests(SparkSessionTestCase):
         # SPARK-32092: CrossValidatorModel.copy() needs to copy all existing 
params
         for param in [
             lambda x: x.getNumFolds(),
-            lambda x: x.getFoldCol(),
             lambda x: x.getSeed()
         ]:
             self.assertEqual(param(cvModel), param(cvModelCopied))
@@ -127,9 +125,9 @@ class CrossValidatorTests(SparkSessionTestCase):
             'foo',
             "Changing the original avgMetrics should not affect the copied 
model"
         )
-        cvModel.subModels[0] = 'foo'
+        cvModel.subModels[0][0].getInducedError = lambda: 'foo'
         self.assertNotEqual(
-            cvModelCopied.subModels[0],
+            cvModelCopied.subModels[0][0].getInducedError(),
             'foo',
             "Changing the original subModels should not affect the copied 
model"
         )
@@ -224,7 +222,6 @@ class CrossValidatorTests(SparkSessionTestCase):
         loadedCvModel = CrossValidatorModel.load(cvModelPath)
         for param in [
             lambda x: x.getNumFolds(),
-            lambda x: x.getFoldCol(),
             lambda x: x.getSeed(),
             lambda x: len(x.subModels)
         ]:
@@ -780,9 +777,9 @@ class TrainValidationSplitTests(SparkSessionTestCase):
             'foo',
             "Changing the original validationMetrics should not affect the 
copied model"
         )
-        tvsModel.subModels[0] = 'foo'
+        tvsModel.subModels[0].getInducedError = lambda: 'foo'
         self.assertNotEqual(
-            tvsModelCopied.subModels[0],
+            tvsModelCopied.subModels[0].getInducedError(),
             'foo',
             "Changing the original subModels should not affect the copied 
model"
         )
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index 91f34ef..6283c8b 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -480,7 +480,10 @@ class CrossValidatorModel(Model, _CrossValidatorParams, 
MLReadable, MLWritable):
             extra = dict()
         bestModel = self.bestModel.copy(extra)
         avgMetrics = list(self.avgMetrics)
-        subModels = [model.copy() for model in self.subModels]
+        subModels = [
+            [sub_model.copy() for sub_model in fold_sub_models]
+            for fold_sub_models in self.subModels
+        ]
         return self._copyValues(CrossValidatorModel(bestModel, avgMetrics, 
subModels), extra=extra)
 
     @since("2.3.0")
@@ -511,7 +514,6 @@ class CrossValidatorModel(Model, _CrossValidatorParams, 
MLReadable, MLWritable):
             "estimator": estimator,
             "estimatorParamMaps": epms,
             "numFolds": java_stage.getNumFolds(),
-            "foldCol": java_stage.getFoldCol(),
             "seed": java_stage.getSeed(),
         }
         for param_name, param_val in params.items():
@@ -544,7 +546,6 @@ class CrossValidatorModel(Model, _CrossValidatorParams, 
MLReadable, MLWritable):
             "estimator": estimator,
             "estimatorParamMaps": epms,
             "numFolds": self.getNumFolds(),
-            "foldCol": self.getFoldCol(),
             "seed": self.getSeed(),
         }
         for param_name, param_val in params.items():


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to