Github user BryanCutler commented on a diff in the pull request:
https://github.com/apache/spark/pull/20627#discussion_r169156415
--- Diff: python/pyspark/ml/tests.py ---
@@ -541,6 +541,16 @@ def test_java_params(self):
self.assertEqual(evaluator._java_obj.getMetricName(), "r2")
self.assertEqual(evaluatorCopy._java_obj.getMetricName(), "mae")
+ def test_clustering_evaluator_with_cosine_distance(self):
+ featureAndPredictions = map(lambda x: (Vectors.dense(x[0]), x[1]),
+ [([1.0, 1.0], 1.0), ([10.0, 10.0],
1.0), ([1.0, 0.5], 2.0),
+ ([10.0, 4.4], 2.0), ([-1.0, 1.0],
3.0), ([-100.0, 90.0], 3.0)])
+ dataset = self.spark.createDataFrame(featureAndPredictions,
["features", "prediction"])
+ evaluator = ClusteringEvaluator(predictionCol="prediction",
distanceMeasure="cosine")
+ self.assertEqual(evaluator.getDistanceMeasure(), "cosine")
+ self.assertEqual(round(evaluator.evaluate(dataset), 5), 0.99267)
--- End diff --
it would be better to use `np.allclose` rather than rounding. Check out
some other tests here for the usage
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]