srowen commented on a change in pull request #26480: [SPARK-29808][ML][PYTHON] 
StopWordsRemover should support multi-cols
URL: https://github.com/apache/spark/pull/26480#discussion_r345219589
 
 

 ##########
 File path: 
mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
 ##########
 @@ -142,16 +165,40 @@ class StopWordsRemover @Since("1.5.0") (@Since("1.5.0") 
override val uid: String
         terms.filter(s => !lowerStopWords.contains(toLower(s)))
       }
     }
-    val metadata = outputSchema($(outputCol)).metadata
-    dataset.select(col("*"), t(col($(inputCol))).as($(outputCol), metadata))
+
+    val (inputColNames, outputColNames) = getInOutCols()
+    val ouputCols = inputColNames.map { inputColName =>
+      t(col(inputColName))
+    }
+    val ouputMetadata = outputColNames.map(outputSchema(_).metadata)
+    dataset.withColumns(outputColNames, ouputCols, ouputMetadata)
   }
 
   @Since("1.5.0")
   override def transformSchema(schema: StructType): StructType = {
-    val inputType = schema($(inputCol)).dataType
-    require(inputType.sameType(ArrayType(StringType)), "Input type must be " +
-      s"${ArrayType(StringType).catalogString} but got 
${inputType.catalogString}.")
-    SchemaUtils.appendColumn(schema, $(outputCol), inputType, 
schema($(inputCol)).nullable)
+    ParamValidators.checkSingleVsMultiColumnParams(this, Seq(outputCol),
+      Seq(outputCols))
+
+    if (isSet(inputCols)) {
+      require(getInputCols.length == getOutputCols.length,
+        s"StopWordsRemover $this has mismatched Params " +
+          s"for multi-column transform. Params (inputCols, outputCols) should 
have " +
 
 Review comment:
   Nit: you don't need interpolation on these two lines.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to