Github user viirya commented on a diff in the pull request:
https://github.com/apache/spark/pull/20442#discussion_r164982190
--- Diff:
mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala ---
@@ -167,25 +167,36 @@ final class QuantileDiscretizer @Since("1.6.0")
(@Since("1.6.0") override val ui
@Since("2.3.0")
def setOutputCols(value: Array[String]): this.type = set(outputCols,
value)
- private[feature] def getInOutCols: (Array[String], Array[String]) = {
- require((isSet(inputCol) && isSet(outputCol) && !isSet(inputCols) &&
!isSet(outputCols)) ||
- (!isSet(inputCol) && !isSet(outputCol) && isSet(inputCols) &&
isSet(outputCols)),
- "QuantileDiscretizer only supports setting either inputCol/outputCol
or" +
- "inputCols/outputCols."
- )
+ @Since("1.6.0")
+ override def transformSchema(schema: StructType): StructType = {
+ ParamValidators.checkSingleVsMultiColumnParams(this, Seq(outputCol),
+ Seq(outputCols))
if (isSet(inputCol)) {
- (Array($(inputCol)), Array($(outputCol)))
- } else {
- require($(inputCols).length == $(outputCols).length,
- "inputCols number do not match outputCols")
- ($(inputCols), $(outputCols))
+ require(!isSet(numBucketsArray),
+ s"numBucketsArray can't be set for single-column
QuantileDiscretizer.")
--- End diff --
Should we check if `numBucketsArray` and `numBuckets` are set at the same
time?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]