Repository: spark Updated Branches: refs/heads/branch-2.1 820847008 -> 6b6eb4e52
[SPARK-18434][ML] Add missing ParamValidations for ML algos ## What changes were proposed in this pull request? Add missing ParamValidations for ML algos ## How was this patch tested? existing tests Author: Zheng RuiFeng <[email protected]> Closes #15881 from zhengruifeng/arg_checking. (cherry picked from commit c68f1a38af67957ee28889667193da8f64bb4342) Signed-off-by: Yanbo Liang <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6b6eb4e5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6b6eb4e5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6b6eb4e5 Branch: refs/heads/branch-2.1 Commit: 6b6eb4e520d07a27aa68d3450f3c7613b233d928 Parents: 8208470 Author: Zheng RuiFeng <[email protected]> Authored: Wed Nov 16 02:46:27 2016 -0800 Committer: Yanbo Liang <[email protected]> Committed: Wed Nov 16 02:46:54 2016 -0800 ---------------------------------------------------------------------- .../main/scala/org/apache/spark/ml/feature/IDF.scala | 3 ++- .../main/scala/org/apache/spark/ml/feature/PCA.scala | 3 ++- .../scala/org/apache/spark/ml/feature/Word2Vec.scala | 13 ++++++++----- .../spark/ml/regression/IsotonicRegression.scala | 3 ++- .../apache/spark/ml/regression/LinearRegression.scala | 6 +++++- .../scala/org/apache/spark/ml/tree/treeParams.scala | 4 +++- 6 files changed, 22 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/6b6eb4e5/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala index 6386dd8..46a0730 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala @@ -44,7 +44,8 @@ private[feature] trait IDFBase extends Params with HasInputCol with HasOutputCol * @group param */ final val minDocFreq = new IntParam( - this, "minDocFreq", "minimum number of documents in which a term should appear for filtering") + this, "minDocFreq", "minimum number of documents in which a term should appear for filtering" + + " (>= 0)", ParamValidators.gtEq(0)) setDefault(minDocFreq -> 0) http://git-wip-us.apache.org/repos/asf/spark/blob/6b6eb4e5/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala index 6b91348..444006f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala @@ -44,7 +44,8 @@ private[feature] trait PCAParams extends Params with HasInputCol with HasOutputC * The number of principal components. * @group param */ - final val k: IntParam = new IntParam(this, "k", "the number of principal components") + final val k: IntParam = new IntParam(this, "k", "the number of principal components (> 0)", + ParamValidators.gt(0)) /** @group getParam */ def getK: Int = $(k) http://git-wip-us.apache.org/repos/asf/spark/blob/6b6eb4e5/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala index d53f3df..3ed08c9 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala @@ -43,7 +43,8 @@ private[feature] trait Word2VecBase extends Params * @group param */ final val vectorSize = new IntParam( - this, "vectorSize", "the dimension of codes after transforming from words") + this, "vectorSize", "the dimension of codes after transforming from words (> 0)", + ParamValidators.gt(0)) setDefault(vectorSize -> 100) /** @group getParam */ @@ -55,7 +56,8 @@ private[feature] trait Word2VecBase extends Params * @group expertParam */ final val windowSize = new IntParam( - this, "windowSize", "the window size (context words from [-window, window])") + this, "windowSize", "the window size (context words from [-window, window]) (> 0)", + ParamValidators.gt(0)) setDefault(windowSize -> 5) /** @group expertGetParam */ @@ -67,7 +69,8 @@ private[feature] trait Word2VecBase extends Params * @group param */ final val numPartitions = new IntParam( - this, "numPartitions", "number of partitions for sentences of words") + this, "numPartitions", "number of partitions for sentences of words (> 0)", + ParamValidators.gt(0)) setDefault(numPartitions -> 1) /** @group getParam */ @@ -80,7 +83,7 @@ private[feature] trait Word2VecBase extends Params * @group param */ final val minCount = new IntParam(this, "minCount", "the minimum number of times a token must " + - "appear to be included in the word2vec model's vocabulary") + "appear to be included in the word2vec model's vocabulary (>= 0)", ParamValidators.gtEq(0)) setDefault(minCount -> 5) /** @group getParam */ @@ -95,7 +98,7 @@ private[feature] trait Word2VecBase extends Params */ final val maxSentenceLength = new IntParam(this, "maxSentenceLength", "Maximum length " + "(in words) of each sentence in the input data. Any sentence longer than this threshold will " + - "be divided into chunks up to the size.") + "be divided into chunks up to the size (> 0)", ParamValidators.gt(0)) setDefault(maxSentenceLength -> 1000) /** @group getParam */ http://git-wip-us.apache.org/repos/asf/spark/blob/6b6eb4e5/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala index cd7b4f2..4d274f3 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala @@ -61,7 +61,8 @@ private[regression] trait IsotonicRegressionBase extends Params with HasFeatures * @group param */ final val featureIndex: IntParam = new IntParam(this, "featureIndex", - "The index of the feature if featuresCol is a vector column, no effect otherwise.") + "The index of the feature if featuresCol is a vector column, no effect otherwise (>= 0)", + ParamValidators.gtEq(0)) /** @group getParam */ final def getFeatureIndex: Int = $(featureIndex) http://git-wip-us.apache.org/repos/asf/spark/blob/6b6eb4e5/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala index 9639b07..71c542a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -171,7 +171,11 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String * @group setParam */ @Since("1.6.0") - def setSolver(value: String): this.type = set(solver, value) + def setSolver(value: String): this.type = { + require(Set("auto", "l-bfgs", "normal").contains(value), + s"Solver $value was not supported. Supported options: auto, l-bfgs, normal") + set(solver, value) + } setDefault(solver -> "auto") /** http://git-wip-us.apache.org/repos/asf/spark/blob/6b6eb4e5/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala index 57c7e44..5a55153 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala @@ -73,11 +73,13 @@ private[ml] trait DecisionTreeParams extends PredictorParams /** * Minimum information gain for a split to be considered at a tree node. + * Should be >= 0.0. * (default = 0.0) * @group param */ final val minInfoGain: DoubleParam = new DoubleParam(this, "minInfoGain", - "Minimum information gain for a split to be considered at a tree node.") + "Minimum information gain for a split to be considered at a tree node.", + ParamValidators.gtEq(0.0)) /** * Maximum memory in MB allocated to histogram aggregation. If too small, then 1 node will be --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
