Github user mengxr commented on a diff in the pull request:
https://github.com/apache/spark/pull/21493#discussion_r192910421
--- Diff:
mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala
---
@@ -62,136 +61,82 @@ class PowerIterationClusteringSuite extends
SparkFunSuite
new PowerIterationClustering().setInitMode("no_such_a_mode")
}
intercept[IllegalArgumentException] {
- new PowerIterationClustering().setIdCol("")
+ new PowerIterationClustering().setSrcCol("")
}
intercept[IllegalArgumentException] {
- new PowerIterationClustering().setNeighborsCol("")
- }
- intercept[IllegalArgumentException] {
- new PowerIterationClustering().setSimilaritiesCol("")
+ new PowerIterationClustering().setDstCol("")
}
}
test("power iteration clustering") {
val n = n1 + n2
- val model = new PowerIterationClustering()
+ val result = new PowerIterationClustering()
.setK(2)
.setMaxIter(40)
- val result = model.transform(data)
-
- val predictions = Array.fill(2)(mutable.Set.empty[Long])
- result.select("id", "prediction").collect().foreach {
- case Row(id: Long, cluster: Integer) => predictions(cluster) += id
- }
- assert(predictions.toSet == Set((1 until n1).toSet, (n1 until
n).toSet))
+ .setWeightCol("weight")
+ .assignClusters(data).as[(Long, Int)].collect().toSet
--- End diff --
it is better to split a long chain of methods.
~~~scala
val assignments = new ...
...
.assignClusters(...)
val localAssignments = assignments
.select('id, 'cluster) # need this we didn't put contract on column orders
.as[(Long, Int)]
.collect()
.toSet
~~~
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]