Github user MLnick commented on a diff in the pull request:
https://github.com/apache/spark/pull/19993#discussion_r163562784
--- Diff:
mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala ---
@@ -401,15 +390,24 @@ class BucketizerSuite extends SparkFunSuite with
MLlibTestSparkContext with Defa
}
}
- test("Both inputCol and inputCols are set") {
- val bucket = new Bucketizer()
- .setInputCol("feature1")
- .setOutputCol("result")
- .setSplits(Array(-0.5, 0.0, 0.5))
- .setInputCols(Array("feature1", "feature2"))
-
- // When both are set, we ignore `inputCols` and just map the column
specified by `inputCol`.
- assert(bucket.isBucketizeMultipleColumns() == false)
+ test("assert exception is thrown if both multi-column and single-column
params are set") {
+ val df = Seq((0.5, 0.3), (0.5, -0.4)).toDF("feature1", "feature2")
+ ParamsSuite.testExclusiveParams(new Bucketizer, df, ("inputCol",
"feature1"),
+ ("inputCols", Array("feature1", "feature2")))
+ ParamsSuite.testExclusiveParams(new Bucketizer, df, ("inputCol",
"feature1"),
+ ("outputCol", "result1"), ("splits", Array(-0.5, 0.0, 0.5)),
+ ("outputCols", Array("result1", "result2")))
+ ParamsSuite.testExclusiveParams(new Bucketizer, df, ("inputCol",
"feature1"),
+ ("outputCol", "result1"), ("splits", Array(-0.5, 0.0, 0.5)),
+ ("splitsArray", Array(Array(-0.5, 0.0, 0.5), Array(-0.5, 0.0, 0.5))))
+
+ // this should fail because at least one of inputCol and inputCols
must be set
+ ParamsSuite.testExclusiveParams(new Bucketizer, df, ("outputCol",
"feature1"),
+ ("splits", Array(-0.5, 0.0, 0.5)))
+
+ // the following should fail because not all the params are set
--- End diff --
Technically here we should probably also test the `inputCols` +
`outputCols` case (i.e. that not setting `splitsArray` also throws an
exception).
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]