Github user zhengruifeng commented on a diff in the pull request: https://github.com/apache/spark/pull/21563#discussion_r195618344 --- Diff: mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringEvaluator.scala --- @@ -107,15 +106,18 @@ class ClusteringEvaluator @Since("2.3.0") (@Since("2.3.0") override val uid: Str @Since("2.3.0") override def evaluate(dataset: Dataset[_]): Double = { - SchemaUtils.checkColumnType(dataset.schema, $(featuresCol), new VectorUDT) + SchemaUtils.validateVectorCompatibleColumn(dataset.schema, $(featuresCol)) SchemaUtils.checkNumericType(dataset.schema, $(predictionCol)) + val vectorCol = DatasetUtils.columnToVector(dataset, $(featuresCol)) + val df = dataset.select(col($(predictionCol)), --- End diff -- @mgaido91 Thanks for your reviewing! I have considered this, however there exists a problem: if we want to append metadata into the transformed column (like using method `.as(alias: String, metadata: Metadata)`) in `DatasetUtils.columnToVector`, how can we get the name of transformed column? The only way to do this I know is: ``` val metadata = ... val vectorCol = .. val vectorName = dataset.select(vectorCol) .schema.head.name vectorCol.as(vectorName, metadata) ```
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org