Repository: spark
Updated Branches:
  refs/heads/branch-1.5 5592d162a -> fe05142f5


[SPARK-9073] [ML] spark.ml Models copy() should call setParent when there is a 
parent

Copied ML models must have the same parent of original ones

Author: lewuathe <[email protected]>
Author: Lewuathe <[email protected]>

Closes #7447 from Lewuathe/SPARK-9073.

(cherry picked from commit 2932e25da4532de9e86b01d08bce0cb680874e70)
Signed-off-by: Joseph K. Bradley <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fe05142f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fe05142f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fe05142f

Branch: refs/heads/branch-1.5
Commit: fe05142f5bc6b11ba9d5d2d77f989610178fc7b5
Parents: 5592d16
Author: lewuathe <[email protected]>
Authored: Thu Aug 13 09:17:19 2015 -0700
Committer: Joseph K. Bradley <[email protected]>
Committed: Thu Aug 13 09:17:31 2015 -0700

----------------------------------------------------------------------
 .../examples/ml/JavaDeveloperApiExample.java    |  3 +-
 .../spark/examples/ml/DeveloperApiExample.scala |  2 +-
 .../scala/org/apache/spark/ml/Pipeline.scala    |  2 +-
 .../classification/DecisionTreeClassifier.scala |  1 +
 .../spark/ml/classification/GBTClassifier.scala |  2 +-
 .../ml/classification/LogisticRegression.scala  |  2 +-
 .../spark/ml/classification/OneVsRest.scala     |  2 +-
 .../classification/RandomForestClassifier.scala |  1 +
 .../apache/spark/ml/feature/Bucketizer.scala    |  4 ++-
 .../scala/org/apache/spark/ml/feature/IDF.scala |  2 +-
 .../apache/spark/ml/feature/MinMaxScaler.scala  |  2 +-
 .../scala/org/apache/spark/ml/feature/PCA.scala |  2 +-
 .../spark/ml/feature/StandardScaler.scala       |  2 +-
 .../apache/spark/ml/feature/StringIndexer.scala |  2 +-
 .../apache/spark/ml/feature/VectorIndexer.scala |  2 +-
 .../org/apache/spark/ml/feature/Word2Vec.scala  |  2 +-
 .../apache/spark/ml/recommendation/ALS.scala    |  2 +-
 .../ml/regression/DecisionTreeRegressor.scala   |  2 +-
 .../spark/ml/regression/GBTRegressor.scala      |  2 +-
 .../spark/ml/regression/LinearRegression.scala  |  2 +-
 .../ml/regression/RandomForestRegressor.scala   |  2 +-
 .../apache/spark/ml/tuning/CrossValidator.scala |  2 +-
 .../org/apache/spark/ml/PipelineSuite.scala     |  3 ++
 .../DecisionTreeClassifierSuite.scala           |  4 +++
 .../ml/classification/GBTClassifierSuite.scala  |  4 +++
 .../LogisticRegressionSuite.scala               |  4 +++
 .../ml/classification/OneVsRestSuite.scala      |  6 +++-
 .../RandomForestClassifierSuite.scala           |  4 +++
 .../spark/ml/feature/BucketizerSuite.scala      |  1 +
 .../spark/ml/feature/MinMaxScalerSuite.scala    |  4 +++
 .../org/apache/spark/ml/feature/PCASuite.scala  |  4 +++
 .../spark/ml/feature/StringIndexerSuite.scala   |  5 ++++
 .../spark/ml/feature/VectorIndexerSuite.scala   |  5 ++++
 .../apache/spark/ml/feature/Word2VecSuite.scala |  4 +++
 .../spark/ml/recommendation/ALSSuite.scala      |  4 +++
 .../regression/DecisionTreeRegressorSuite.scala | 11 +++++++
 .../spark/ml/regression/GBTRegressorSuite.scala |  5 ++++
 .../ml/regression/LinearRegressionSuite.scala   |  5 ++++
 .../regression/RandomForestRegressorSuite.scala |  7 ++++-
 .../spark/ml/tuning/CrossValidatorSuite.scala   |  5 ++++
 .../apache/spark/ml/util/MLTestingUtils.scala   | 30 ++++++++++++++++++++
 41 files changed, 138 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java
----------------------------------------------------------------------
diff --git 
a/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java
 
b/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java
index 9df26ff..3f1fe90 100644
--- 
a/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java
+++ 
b/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java
@@ -230,6 +230,7 @@ class MyJavaLogisticRegressionModel
    */
   @Override
   public MyJavaLogisticRegressionModel copy(ParamMap extra) {
-    return copyValues(new MyJavaLogisticRegressionModel(uid(), weights_), 
extra);
+    return copyValues(new MyJavaLogisticRegressionModel(uid(), weights_), 
extra)
+      .setParent(parent());
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
----------------------------------------------------------------------
diff --git 
a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
 
b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
index 78f31b4..340c355 100644
--- 
a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
+++ 
b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
@@ -179,7 +179,7 @@ private class MyLogisticRegressionModel(
    * This is used for the default implementation of [[transform()]].
    */
   override def copy(extra: ParamMap): MyLogisticRegressionModel = {
-    copyValues(new MyLogisticRegressionModel(uid, weights), extra)
+    copyValues(new MyLogisticRegressionModel(uid, weights), 
extra).setParent(parent)
   }
 }
 // scalastyle:on println

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala 
b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
index aef2c01..a3e5940 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
@@ -198,6 +198,6 @@ class PipelineModel private[ml] (
   }
 
   override def copy(extra: ParamMap): PipelineModel = {
-    new PipelineModel(uid, stages.map(_.copy(extra)))
+    new PipelineModel(uid, stages.map(_.copy(extra))).setParent(parent)
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
index 29598f3..6f70b96 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
@@ -141,6 +141,7 @@ final class DecisionTreeClassificationModel private[ml] (
 
   override def copy(extra: ParamMap): DecisionTreeClassificationModel = {
     copyValues(new DecisionTreeClassificationModel(uid, rootNode, numClasses), 
extra)
+      .setParent(parent)
   }
 
   override def toString: String = {

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala 
b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
index c3891a9..3073a2a 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
@@ -196,7 +196,7 @@ final class GBTClassificationModel(
   }
 
   override def copy(extra: ParamMap): GBTClassificationModel = {
-    copyValues(new GBTClassificationModel(uid, _trees, _treeWeights), extra)
+    copyValues(new GBTClassificationModel(uid, _trees, _treeWeights), 
extra).setParent(parent)
   }
 
   override def toString: String = {

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 5bcd711..21fbe38 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -468,7 +468,7 @@ class LogisticRegressionModel private[ml] (
   }
 
   override def copy(extra: ParamMap): LogisticRegressionModel = {
-    copyValues(new LogisticRegressionModel(uid, weights, intercept), extra)
+    copyValues(new LogisticRegressionModel(uid, weights, intercept), 
extra).setParent(parent)
   }
 
   override protected def raw2prediction(rawPrediction: Vector): Double = {

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala 
b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
index 1741f19..1132d80 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
@@ -138,7 +138,7 @@ final class OneVsRestModel private[ml] (
   override def copy(extra: ParamMap): OneVsRestModel = {
     val copied = new OneVsRestModel(
       uid, labelMetadata, 
models.map(_.copy(extra).asInstanceOf[ClassificationModel[_, _]]))
-    copyValues(copied, extra)
+    copyValues(copied, extra).setParent(parent)
   }
 }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
index 156050a..11a6d72 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
@@ -189,6 +189,7 @@ final class RandomForestClassificationModel private[ml] (
 
   override def copy(extra: ParamMap): RandomForestClassificationModel = {
     copyValues(new RandomForestClassificationModel(uid, _trees, numFeatures, 
numClasses), extra)
+      .setParent(parent)
   }
 
   override def toString: String = {

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
index 67e4785..cfca494 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
@@ -90,7 +90,9 @@ final class Bucketizer(override val uid: String)
     SchemaUtils.appendColumn(schema, prepOutputField(schema))
   }
 
-  override def copy(extra: ParamMap): Bucketizer = defaultCopy(extra)
+  override def copy(extra: ParamMap): Bucketizer = {
+    defaultCopy[Bucketizer](extra).setParent(parent)
+  }
 }
 
 private[feature] object Bucketizer {

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
index ecde808..9384474 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
@@ -114,6 +114,6 @@ class IDFModel private[ml] (
 
   override def copy(extra: ParamMap): IDFModel = {
     val copied = new IDFModel(uid, idfModel)
-    copyValues(copied, extra)
+    copyValues(copied, extra).setParent(parent)
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
index 9a473dd..1b494ec 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
@@ -173,6 +173,6 @@ class MinMaxScalerModel private[ml] (
 
   override def copy(extra: ParamMap): MinMaxScalerModel = {
     val copied = new MinMaxScalerModel(uid, originalMin, originalMax)
-    copyValues(copied, extra)
+    copyValues(copied, extra).setParent(parent)
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
index 2d3bb68..5390847 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
@@ -125,6 +125,6 @@ class PCAModel private[ml] (
 
   override def copy(extra: ParamMap): PCAModel = {
     val copied = new PCAModel(uid, pcaModel)
-    copyValues(copied, extra)
+    copyValues(copied, extra).setParent(parent)
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
index 72b545e..f6d0b0c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
@@ -136,6 +136,6 @@ class StandardScalerModel private[ml] (
 
   override def copy(extra: ParamMap): StandardScalerModel = {
     val copied = new StandardScalerModel(uid, scaler)
-    copyValues(copied, extra)
+    copyValues(copied, extra).setParent(parent)
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
index ebfa972..569c834 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
@@ -150,7 +150,7 @@ class StringIndexerModel private[ml] (
 
   override def copy(extra: ParamMap): StringIndexerModel = {
     val copied = new StringIndexerModel(uid, labels)
-    copyValues(copied, extra)
+    copyValues(copied, extra).setParent(parent)
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
index c73bdcc..6875aef 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
@@ -405,6 +405,6 @@ class VectorIndexerModel private[ml] (
 
   override def copy(extra: ParamMap): VectorIndexerModel = {
     val copied = new VectorIndexerModel(uid, numFeatures, categoryMaps)
-    copyValues(copied, extra)
+    copyValues(copied, extra).setParent(parent)
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
index 29acc3e..5af775a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
@@ -221,6 +221,6 @@ class Word2VecModel private[ml] (
 
   override def copy(extra: ParamMap): Word2VecModel = {
     val copied = new Word2VecModel(uid, wordVectors)
-    copyValues(copied, extra)
+    copyValues(copied, extra).setParent(parent)
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala 
b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index 2e44cd4..7db8ad8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -219,7 +219,7 @@ class ALSModel private[ml] (
 
   override def copy(extra: ParamMap): ALSModel = {
     val copied = new ALSModel(uid, rank, userFactors, itemFactors)
-    copyValues(copied, extra)
+    copyValues(copied, extra).setParent(parent)
   }
 }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
index dc94a14..a2bcd67 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
@@ -114,7 +114,7 @@ final class DecisionTreeRegressionModel private[ml] (
   }
 
   override def copy(extra: ParamMap): DecisionTreeRegressionModel = {
-    copyValues(new DecisionTreeRegressionModel(uid, rootNode), extra)
+    copyValues(new DecisionTreeRegressionModel(uid, rootNode), 
extra).setParent(parent)
   }
 
   override def toString: String = {

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala 
b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
index 5633bc3..b66e61f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
@@ -185,7 +185,7 @@ final class GBTRegressionModel(
   }
 
   override def copy(extra: ParamMap): GBTRegressionModel = {
-    copyValues(new GBTRegressionModel(uid, _trees, _treeWeights), extra)
+    copyValues(new GBTRegressionModel(uid, _trees, _treeWeights), 
extra).setParent(parent)
   }
 
   override def toString: String = {

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala 
b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 92d819b..884003e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -312,7 +312,7 @@ class LinearRegressionModel private[ml] (
   override def copy(extra: ParamMap): LinearRegressionModel = {
     val newModel = copyValues(new LinearRegressionModel(uid, weights, 
intercept))
     if (trainingSummary.isDefined) newModel.setSummary(trainingSummary.get)
-    newModel
+    newModel.setParent(parent)
   }
 }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
index db75c0d..2f36da3 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
@@ -151,7 +151,7 @@ final class RandomForestRegressionModel private[ml] (
   }
 
   override def copy(extra: ParamMap): RandomForestRegressionModel = {
-    copyValues(new RandomForestRegressionModel(uid, _trees, numFeatures), 
extra)
+    copyValues(new RandomForestRegressionModel(uid, _trees, numFeatures), 
extra).setParent(parent)
   }
 
   override def toString: String = {

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala 
b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
index f979319..4792eb0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
@@ -160,6 +160,6 @@ class CrossValidatorModel private[ml] (
       uid,
       bestModel.copy(extra).asInstanceOf[Model[_]],
       avgMetrics.clone())
-    copyValues(copied, extra)
+    copyValues(copied, extra).setParent(parent)
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala
index 63d2fa3..1f2c9b7 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala
@@ -26,6 +26,7 @@ import org.scalatest.mock.MockitoSugar.mock
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.feature.HashingTF
 import org.apache.spark.ml.param.ParamMap
+import org.apache.spark.ml.util.MLTestingUtils
 import org.apache.spark.sql.DataFrame
 
 class PipelineSuite extends SparkFunSuite {
@@ -65,6 +66,8 @@ class PipelineSuite extends SparkFunSuite {
       .setStages(Array(estimator0, transformer1, estimator2, transformer3))
     val pipelineModel = pipeline.fit(dataset0)
 
+    MLTestingUtils.checkCopy(pipelineModel)
+
     assert(pipelineModel.stages.length === 4)
     assert(pipelineModel.stages(0).eq(model0))
     assert(pipelineModel.stages(1).eq(transformer1))

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
index c7bbf1c..4b7c5d3 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
@@ -21,6 +21,7 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.impl.TreeTests
 import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.tree.LeafNode
+import org.apache.spark.ml.util.MLTestingUtils
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.{DecisionTree => OldDecisionTree, 
DecisionTreeSuite => OldDecisionTreeSuite}
@@ -244,6 +245,9 @@ class DecisionTreeClassifierSuite extends SparkFunSuite 
with MLlibTestSparkConte
     val newData: DataFrame = TreeTests.setMetadata(rdd, categoricalFeatures, 
numClasses)
     val newTree = dt.fit(newData)
 
+    // copied model must have the same parent.
+    MLTestingUtils.checkCopy(newTree)
+
     val predictions = newTree.transform(newData)
       .select(newTree.getPredictionCol, newTree.getRawPredictionCol, 
newTree.getProbabilityCol)
       .collect()

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
index d4b5896..e3909bc 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
@@ -22,6 +22,7 @@ import org.apache.spark.ml.impl.TreeTests
 import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.regression.DecisionTreeRegressionModel
 import org.apache.spark.ml.tree.LeafNode
+import org.apache.spark.ml.util.MLTestingUtils
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.{EnsembleTestHelper, GradientBoostedTrees 
=> OldGBT}
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo}
@@ -92,6 +93,9 @@ class GBTClassifierSuite extends SparkFunSuite with 
MLlibTestSparkContext {
       .setCheckpointInterval(2)
     val model = gbt.fit(df)
 
+    // copied model must have the same parent.
+    MLTestingUtils.checkCopy(model)
+
     sc.checkpointDir = None
     Utils.deleteRecursively(tempDir)
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index e354e16..cce39f3 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.ml.classification
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.param.ParamsSuite
+import org.apache.spark.ml.util.MLTestingUtils
 import org.apache.spark.mllib.classification.LogisticRegressionSuite._
 import org.apache.spark.mllib.linalg.{Vectors, Vector}
 import org.apache.spark.mllib.util.MLlibTestSparkContext
@@ -135,6 +136,9 @@ class LogisticRegressionSuite extends SparkFunSuite with 
MLlibTestSparkContext {
     lr.setFitIntercept(false)
     val model = lr.fit(dataset)
     assert(model.intercept === 0.0)
+
+    // copied model must have the same parent.
+    MLTestingUtils.checkCopy(model)
   }
 
   test("logistic regression with setters") {

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
index bd8e819..977f0e0 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.attribute.NominalAttribute
 import org.apache.spark.ml.feature.StringIndexer
 import org.apache.spark.ml.param.{ParamMap, ParamsSuite}
-import org.apache.spark.ml.util.MetadataUtils
+import org.apache.spark.ml.util.{MLTestingUtils, MetadataUtils}
 import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
 import org.apache.spark.mllib.classification.LogisticRegressionSuite._
 import org.apache.spark.mllib.evaluation.MulticlassMetrics
@@ -70,6 +70,10 @@ class OneVsRestSuite extends SparkFunSuite with 
MLlibTestSparkContext {
     assert(ova.getLabelCol === "label")
     assert(ova.getPredictionCol === "prediction")
     val ovaModel = ova.fit(dataset)
+
+    // copied model must have the same parent.
+    MLTestingUtils.checkCopy(ovaModel)
+
     assert(ovaModel.models.size === numClasses)
     val transformedDataset = ovaModel.transform(dataset)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
index 6ca4b5a..b4403ec 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
@@ -21,6 +21,7 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.impl.TreeTests
 import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.tree.LeafNode
+import org.apache.spark.ml.util.MLTestingUtils
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.{EnsembleTestHelper, RandomForest => 
OldRandomForest}
@@ -135,6 +136,9 @@ class RandomForestClassifierSuite extends SparkFunSuite 
with MLlibTestSparkConte
     val df: DataFrame = TreeTests.setMetadata(rdd, categoricalFeatures, 
numClasses)
     val model = rf.fit(df)
 
+    // copied model must have the same parent.
+    MLTestingUtils.checkCopy(model)
+
     val predictions = model.transform(df)
       .select(rf.getPredictionCol, rf.getRawPredictionCol, 
rf.getProbabilityCol)
       .collect()

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
index ec85e0d..0eba34f 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
@@ -21,6 +21,7 @@ import scala.util.Random
 
 import org.apache.spark.{SparkException, SparkFunSuite}
 import org.apache.spark.ml.param.ParamsSuite
+import org.apache.spark.ml.util.MLTestingUtils
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.util.TestingUtils._

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala
index c452054..c04dda4 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.ml.feature
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.ml.util.MLTestingUtils
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.sql.{Row, SQLContext}
@@ -51,6 +52,9 @@ class MinMaxScalerSuite extends SparkFunSuite with 
MLlibTestSparkContext {
       .foreach { case Row(vector1: Vector, vector2: Vector) =>
         assert(vector1.equals(vector2), "Transformed vector is different with 
expected.")
     }
+
+    // copied model must have the same parent.
+    MLTestingUtils.checkCopy(model)
   }
 
   test("MinMaxScaler arguments max must be larger than min") {

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/test/scala/org/apache/spark/ml/feature/PCASuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/PCASuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/PCASuite.scala
index d0ae36b..30c500f 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/PCASuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/PCASuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.ml.feature
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.param.ParamsSuite
+import org.apache.spark.ml.util.MLTestingUtils
 import org.apache.spark.mllib.linalg.distributed.RowMatrix
 import org.apache.spark.mllib.linalg.{Vector, Vectors, DenseMatrix, Matrices}
 import org.apache.spark.mllib.util.MLlibTestSparkContext
@@ -56,6 +57,9 @@ class PCASuite extends SparkFunSuite with 
MLlibTestSparkContext {
       .setK(3)
       .fit(df)
 
+    // copied model must have the same parent.
+    MLTestingUtils.checkCopy(pca)
+
     pca.transform(df).select("pca_features", "expected").collect().foreach {
       case Row(x: Vector, y: Vector) =>
         assert(x ~== y absTol 1e-5, "Transformed vector is different with 
expected vector.")

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
index d0295a0..124af62 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.ml.feature
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.attribute.{Attribute, NominalAttribute}
 import org.apache.spark.ml.param.ParamsSuite
+import org.apache.spark.ml.util.MLTestingUtils
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 
 class StringIndexerSuite extends SparkFunSuite with MLlibTestSparkContext {
@@ -37,6 +38,10 @@ class StringIndexerSuite extends SparkFunSuite with 
MLlibTestSparkContext {
       .setInputCol("label")
       .setOutputCol("labelIndex")
       .fit(df)
+
+    // copied model must have the same parent.
+    MLTestingUtils.checkCopy(indexer)
+
     val transformed = indexer.transform(df)
     val attr = Attribute.fromStructField(transformed.schema("labelIndex"))
       .asInstanceOf[NominalAttribute]

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/test/scala/org/apache/spark/ml/feature/VectorIndexerSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorIndexerSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorIndexerSuite.scala
index 03120c8..8cb0a2c 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorIndexerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorIndexerSuite.scala
@@ -22,6 +22,7 @@ import scala.beans.{BeanInfo, BeanProperty}
 import org.apache.spark.{Logging, SparkException, SparkFunSuite}
 import org.apache.spark.ml.attribute._
 import org.apache.spark.ml.param.ParamsSuite
+import org.apache.spark.ml.util.MLTestingUtils
 import org.apache.spark.mllib.linalg.{SparseVector, Vector, Vectors}
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.rdd.RDD
@@ -109,6 +110,10 @@ class VectorIndexerSuite extends SparkFunSuite with 
MLlibTestSparkContext with L
   test("Throws error when given RDDs with different size vectors") {
     val vectorIndexer = getIndexer
     val model = vectorIndexer.fit(densePoints1) // vectors of length 3
+
+    // copied model must have the same parent.
+    MLTestingUtils.checkCopy(model)
+
     model.transform(densePoints1) // should work
     model.transform(sparsePoints1) // should work
     intercept[SparkException] {

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
index adcda0e..a2e46f2 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.ml.feature
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.param.ParamsSuite
+import org.apache.spark.ml.util.MLTestingUtils
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.util.TestingUtils._
@@ -62,6 +63,9 @@ class Word2VecSuite extends SparkFunSuite with 
MLlibTestSparkContext {
       .setSeed(42L)
       .fit(docDF)
 
+    // copied model must have the same parent.
+    MLTestingUtils.checkCopy(model)
+
     model.transform(docDF).select("result", "expected").collect().foreach {
       case Row(vector1: Vector, vector2: Vector) =>
         assert(vector1 ~== vector2 absTol 1E-5, "Transformed vector is 
different with expected.")

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
index 2e5cfe7..eadc80e 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
@@ -28,6 +28,7 @@ import com.github.fommil.netlib.BLAS.{getInstance => blas}
 
 import org.apache.spark.{Logging, SparkException, SparkFunSuite}
 import org.apache.spark.ml.recommendation.ALS._
+import org.apache.spark.ml.util.MLTestingUtils
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.util.TestingUtils._
@@ -374,6 +375,9 @@ class ALSSuite extends SparkFunSuite with 
MLlibTestSparkContext with Logging {
       }
     logInfo(s"Test RMSE is $rmse.")
     assert(rmse < targetRMSE)
+
+    // copied model must have the same parent.
+    MLTestingUtils.checkCopy(model)
   }
 
   test("exact rank-1 matrix") {

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
index 33aa9d0..b092bcd 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.ml.regression
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.impl.TreeTests
+import org.apache.spark.ml.util.MLTestingUtils
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.{DecisionTree => OldDecisionTree,
   DecisionTreeSuite => OldDecisionTreeSuite}
@@ -61,6 +62,16 @@ class DecisionTreeRegressorSuite extends SparkFunSuite with 
MLlibTestSparkContex
     compareAPIs(categoricalDataPointsRDD, dt, categoricalFeatures)
   }
 
+  test("copied model must have the same parent") {
+    val categoricalFeatures = Map(0 -> 2, 1-> 2)
+    val df = TreeTests.setMetadata(categoricalDataPointsRDD, 
categoricalFeatures, numClasses = 0)
+    val model = new DecisionTreeRegressor()
+      .setImpurity("variance")
+      .setMaxDepth(2)
+      .setMaxBins(8).fit(df)
+    MLTestingUtils.checkCopy(model)
+  }
+
   /////////////////////////////////////////////////////////////////////////////
   // Tests of model save/load
   /////////////////////////////////////////////////////////////////////////////

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
index dbdce0c..a68197b 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.ml.regression
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.impl.TreeTests
+import org.apache.spark.ml.util.MLTestingUtils
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.{EnsembleTestHelper, GradientBoostedTrees 
=> OldGBT}
@@ -82,6 +83,9 @@ class GBTRegressorSuite extends SparkFunSuite with 
MLlibTestSparkContext {
       .setMaxDepth(2)
       .setMaxIter(2)
     val model = gbt.fit(df)
+
+    // copied model must have the same parent.
+    MLTestingUtils.checkCopy(model)
     val preds = model.transform(df)
     val predictions = preds.select("prediction").map(_.getDouble(0))
     // Checks based on SPARK-8736 (to ensure it is not doing classification)
@@ -104,6 +108,7 @@ class GBTRegressorSuite extends SparkFunSuite with 
MLlibTestSparkContext {
 
     sc.checkpointDir = None
     Utils.deleteRecursively(tempDir)
+
   }
 
   // TODO: Reinstate test once runWithValidation is implemented  SPARK-7132

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
index 21ad822..2aaee71 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.ml.regression
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.param.ParamsSuite
+import org.apache.spark.ml.util.MLTestingUtils
 import org.apache.spark.mllib.linalg.{DenseVector, Vectors}
 import org.apache.spark.mllib.util.{LinearDataGenerator, MLlibTestSparkContext}
 import org.apache.spark.mllib.util.TestingUtils._
@@ -72,6 +73,10 @@ class LinearRegressionSuite extends SparkFunSuite with 
MLlibTestSparkContext {
     assert(lir.getFitIntercept)
     assert(lir.getStandardization)
     val model = lir.fit(dataset)
+
+    // copied model must have the same parent.
+    MLTestingUtils.checkCopy(model)
+
     model.transform(dataset)
       .select("label", "prediction")
       .collect()

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
index 992ce95..7b1b3f1 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.ml.regression
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.impl.TreeTests
+import org.apache.spark.ml.util.MLTestingUtils
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.{EnsembleTestHelper, RandomForest => 
OldRandomForest}
@@ -91,7 +92,11 @@ class RandomForestRegressorSuite extends SparkFunSuite with 
MLlibTestSparkContex
     val categoricalFeatures = Map.empty[Int, Int]
     val df: DataFrame = TreeTests.setMetadata(data, categoricalFeatures, 0)
 
-    val importances = rf.fit(df).featureImportances
+    val model = rf.fit(df)
+
+    // copied model must have the same parent.
+    MLTestingUtils.checkCopy(model)
+    val importances = model.featureImportances
     val mostImportantFeature = importances.argmax
     assert(mostImportantFeature === 1)
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
index db64511..aaca08b 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.ml.tuning
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.ml.util.MLTestingUtils
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.classification.LogisticRegression
 import org.apache.spark.ml.evaluation.{BinaryClassificationEvaluator, 
Evaluator, RegressionEvaluator}
@@ -53,6 +54,10 @@ class CrossValidatorSuite extends SparkFunSuite with 
MLlibTestSparkContext {
       .setEvaluator(eval)
       .setNumFolds(3)
     val cvModel = cv.fit(dataset)
+
+    // copied model must have the same paren.
+    MLTestingUtils.checkCopy(cvModel)
+
     val parent = cvModel.bestModel.parent.asInstanceOf[LogisticRegression]
     assert(parent.getRegParam === 0.001)
     assert(parent.getMaxIter === 10)

http://git-wip-us.apache.org/repos/asf/spark/blob/fe05142f/mllib/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala 
b/mllib/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala
new file mode 100644
index 0000000..d290cc9
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.util
+
+import org.apache.spark.ml.Model
+import org.apache.spark.ml.param.ParamMap
+
+object MLTestingUtils {
+  def checkCopy(model: Model[_]): Unit = {
+    val copied = model.copy(ParamMap.empty)
+      .asInstanceOf[Model[_]]
+    assert(copied.parent.uid == model.parent.uid)
+    assert(copied.parent == model.parent)
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to