Github user jkbradley commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19627#discussion_r180610868
  
    --- Diff: python/pyspark/ml/tests.py ---
    @@ -1186,6 +1228,38 @@ def test_parallel_evaluation(self):
             tvsParallelModel = tvs.fit(dataset)
             self.assertEqual(tvsSerialModel.validationMetrics, 
tvsParallelModel.validationMetrics)
     
    +    def test_expose_sub_models(self):
    +        temp_path = tempfile.mkdtemp()
    +        dataset = self.spark.createDataFrame(
    +            [(Vectors.dense([0.0]), 0.0),
    +             (Vectors.dense([0.4]), 1.0),
    +             (Vectors.dense([0.5]), 0.0),
    +             (Vectors.dense([0.6]), 1.0),
    +             (Vectors.dense([1.0]), 1.0)] * 10,
    +            ["features", "label"])
    +        lr = LogisticRegression()
    +        grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1]).build()
    +        evaluator = BinaryClassificationEvaluator()
    +        tvs = TrainValidationSplit(estimator=lr, estimatorParamMaps=grid, 
evaluator=evaluator,
    +                                   collectSubModels=True)
    +        tvsModel = tvs.fit(dataset)
    +        assert len(tvsModel.subModels) == len(grid)
    --- End diff --
    
    Use self.assertEqual here and elsewhere.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to