Github user jkbradley commented on a diff in the pull request:
https://github.com/apache/spark/pull/20829#discussion_r177542280
--- Diff:
mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala ---
@@ -136,34 +181,106 @@ class VectorAssembler @Since("1.4.0")
(@Since("1.4.0") override val uid: String)
@Since("1.6.0")
object VectorAssembler extends DefaultParamsReadable[VectorAssembler] {
+ private[feature] val SKIP_INVALID: String = "skip"
+ private[feature] val ERROR_INVALID: String = "error"
+ private[feature] val KEEP_INVALID: String = "keep"
+ private[feature] val supportedHandleInvalids: Array[String] =
+ Array(SKIP_INVALID, ERROR_INVALID, KEEP_INVALID)
+
+ /**
+ * Infers lengths of vector columns from the first row of the dataset
+ * @param dataset the dataset
+ * @param columns name of vector columns whose lengths need to be
inferred
+ * @return map of column names to lengths
+ */
+ private[feature] def getVectorLengthsFromFirstRow(
+ dataset: Dataset[_], columns: Seq[String]): Map[String, Int] = {
--- End diff --
scala style: For multiline class and method headers, put the first argument
on the next line, with +4 space indentation
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]