zhengruifeng commented on a change in pull request #20732: [SPARK-23578][ML]
Add multicolumn support for Binarizer
URL: https://github.com/apache/spark/pull/20732#discussion_r327004881
##########
File path: mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
##########
@@ -45,66 +47,117 @@ final class Binarizer @Since("1.4.0") (@Since("1.4.0")
override val uid: String)
* The features greater than the threshold, will be binarized to 1.0.
* The features equal to or less than the threshold, will be binarized to
0.0.
* Default: 0.0
+ *
* @group param
*/
@Since("1.4.0")
val threshold: DoubleParam =
- new DoubleParam(this, "threshold", "threshold used to binarize continuous
features")
+ new DoubleParam(this, "threshold", "threshold used to binarize continuous
features")
+
+ /** @group param */
+ @Since("2.3.1")
+ val thresholds: DoubleArrayParam =
+ new DoubleArrayParam(this, "thresholds", "thresholds used to binarize
continuous features")
/** @group getParam */
@Since("1.4.0")
def getThreshold: Double = $(threshold)
+ /** @group getParam */
+ @Since("2.3.1")
+ def getThresholds: Array[Double] = $(thresholds)
+
/** @group setParam */
@Since("1.4.0")
def setThreshold(value: Double): this.type = set(threshold, value)
setDefault(threshold -> 0.0)
+ /** @group setParam */
+ @Since("2.3.1")
+ def setThresholds(value: Array[Double]): this.type = set(thresholds, value)
+
/** @group setParam */
@Since("1.4.0")
def setInputCol(value: String): this.type = set(inputCol, value)
+ /** @group setParam */
+ @Since("2.3.1")
+ def setInputCols(value: Array[String]): this.type = set(inputCols, value)
+
/** @group setParam */
@Since("1.4.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
+ @Since("2.3.1")
+ def setOutputCols(value: Array[String]): this.type = set(outputCols, value)
+
+ @Since("2.3.1")
+ private[feature] def isBinarizerMultipleColumns(): Boolean = {
+ if (isSet(inputCols) && isSet(inputCol)) {
+ logWarning("Both `inputCol` and `inputCols` are set, we ignore
`inputCols` and this " +
Review comment:
We can not set both of them, according to current ML's convention.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]