Github user jkbradley commented on a diff in the pull request:
https://github.com/apache/spark/pull/20686#discussion_r173582122
--- Diff:
mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala ---
@@ -90,23 +96,29 @@ class OneHotEncoderSuite
val encoder = new OneHotEncoder()
.setInputCol("size")
.setOutputCol("encoded")
- val output = encoder.transform(df)
- val group = AttributeGroup.fromStructField(output.schema("encoded"))
- assert(group.size === 2)
- assert(group.getAttr(0) ===
BinaryAttribute.defaultAttr.withName("small").withIndex(0))
- assert(group.getAttr(1) ===
BinaryAttribute.defaultAttr.withName("medium").withIndex(1))
+ testTransformerByGlobalCheckFunc[(Double)](df, encoder, "encoded") {
rows =>
+ val group =
AttributeGroup.fromStructField(rows.head.schema("encoded"))
+ assert(group.size === 2)
+ assert(group.getAttr(0) ===
BinaryAttribute.defaultAttr.withName("small").withIndex(0))
+ assert(group.getAttr(1) ===
BinaryAttribute.defaultAttr.withName("medium").withIndex(1))
+ }
}
- test("input column without ML attribute") {
+
+ ignore("input column without ML attribute") {
--- End diff --
Let's keep the test but limit it to batch. People should switch to
OneHotEncoderEstimator anyways.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]