Github user WeichenXu123 commented on a diff in the pull request:
https://github.com/apache/spark/pull/20829#discussion_r174990264
--- Diff:
mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala ---
@@ -49,32 +51,65 @@ class VectorAssembler @Since("1.4.0") (@Since("1.4.0")
override val uid: String)
@Since("1.4.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
+ /** @group setParam */
+ @Since("1.6.0")
+ def setHandleInvalid(value: String): this.type = set(handleInvalid,
value)
+
+ /**
+ * Param for how to handle invalid data (NULL values). Options are
'skip' (filter out rows with
+ * invalid data), 'error' (throw an error), or 'keep' (return relevant
number of NaN in the
+ * output).
+ * Default: "error"
+ * @group param
+ */
+ @Since("1.6.0")
+ override val handleInvalid: Param[String] = new Param[String](this,
"handleInvalid",
+ "Hhow to handle invalid data (NULL values). Options are 'skip' (filter
out rows with " +
+ "invalid data), 'error' (throw an error), or 'keep' (return relevant
number of NaN " +
+ "in the * output).",
ParamValidators.inArray(VectorAssembler.supportedHandleInvalids))
--- End diff --
"in the * output" -> "in the output"
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]