Github user jkbradley commented on a diff in the pull request:
https://github.com/apache/spark/pull/21081#discussion_r181847061
--- Diff: mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
---
@@ -144,7 +156,12 @@ class KMeansModel private[ml] (
// TODO: Replace the temp fix when we have proper evaluators defined for
clustering.
@Since("2.0.0")
def computeCost(dataset: Dataset[_]): Double = {
- SchemaUtils.checkColumnType(dataset.schema, $(featuresCol), new
VectorUDT)
+ val typeCandidates = List( new VectorUDT,
+ new ArrayType(DoubleType, true),
+ new ArrayType(DoubleType, false),
+ new ArrayType(FloatType, true),
+ new ArrayType(FloatType, false))
+ SchemaUtils.checkColumnTypes(dataset.schema, $(featuresCol),
typeCandidates)
val data: RDD[OldVector] = dataset.select(col($(featuresCol))).rdd.map
{
--- End diff --
this won't take non-Vector types though; a unit test would catch this
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]