Github user BryanCutler commented on a diff in the pull request:

    https://github.com/apache/spark/pull/20627#discussion_r169156415
  
    --- Diff: python/pyspark/ml/tests.py ---
    @@ -541,6 +541,16 @@ def test_java_params(self):
             self.assertEqual(evaluator._java_obj.getMetricName(), "r2")
             self.assertEqual(evaluatorCopy._java_obj.getMetricName(), "mae")
     
    +    def test_clustering_evaluator_with_cosine_distance(self):
    +        featureAndPredictions = map(lambda x: (Vectors.dense(x[0]), x[1]),
    +                                    [([1.0, 1.0], 1.0), ([10.0, 10.0], 
1.0), ([1.0, 0.5], 2.0),
    +                                     ([10.0, 4.4], 2.0), ([-1.0, 1.0], 
3.0), ([-100.0, 90.0], 3.0)])
    +        dataset = self.spark.createDataFrame(featureAndPredictions, 
["features", "prediction"])
    +        evaluator = ClusteringEvaluator(predictionCol="prediction", 
distanceMeasure="cosine")
    +        self.assertEqual(evaluator.getDistanceMeasure(), "cosine")
    +        self.assertEqual(round(evaluator.evaluate(dataset), 5),  0.99267)
    --- End diff --
    
    it would be better to use `np.allclose` rather than rounding.  Check out 
some other tests here for the usage


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to