Github user mengxr commented on a diff in the pull request:
https://github.com/apache/spark/pull/9839#discussion_r45439149
--- Diff:
mllib/src/test/scala/org/apache/spark/ml/feature/StandardScalerSuite.scala ---
@@ -116,23 +116,19 @@ class StandardScalerSuite extends SparkFunSuite with
MLlibTestSparkContext
assertResult(standardScaler3.transform(df3))
}
- test("StandardScaler read/write") {
- val t = new StandardScaler()
- .setInputCol("myInputCol")
- .setOutputCol("myOutputCol")
- .setWithStd(false)
- .setWithMean(true)
- testDefaultReadWrite(t)
- }
-
- test("StandardScalerModel read/write") {
- val oldModel = new feature.StandardScalerModel(
- Vectors.dense(1.0, 2.0), Vectors.dense(3.0, 4.0), false, true)
- val instance = new StandardScalerModel("myStandardScalerModel",
oldModel)
- val newInstance = testDefaultReadWrite(instance)
- assert(newInstance.std === instance.std)
- assert(newInstance.mean === instance.mean)
- assert(newInstance.getWithStd === instance.getWithStd)
- assert(newInstance.getWithMean === instance.getWithMean)
+ test("read/write") {
+ def checkModelData(model1: StandardScalerModel, model2:
StandardScalerModel): Unit = {
+ assert(model1.mean === model2.mean)
+ assert(model1.std === model2.std)
+ }
+ val allParams: Map[String, Any] = Map(
+ "inputCol" -> "features",
+ "outputCol" -> "standardized_features",
+ "withMean" -> true,
+ "withStd" -> true
+ )
+ val df =
sqlContext.createDataFrame(data.zip(resWithBoth)).toDF("features", "expected")
+ val standardScaler = new StandardScaler()
+ testEstimatorAndModelReadWrite(standardScaler, df, allParams,
checkModelData)
}
--- End diff --
I think this is not an ideal unit test for read/write because the model
fitting part shouldn't be part of it, which is already covered by other tests.
Constructing estimator and model directly can save some test time.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]