Github user MLnick commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21465#discussion_r194143768
  
    --- Diff: python/pyspark/ml/classification.py ---
    @@ -1251,26 +1256,33 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, 
HasLabelCol, HasPredictionCol
                          "the contribution of each estimator.",
                          typeConverter=TypeConverters.toFloat)
     
    +    validationTol = Param(Params._dummy(), "validationTol",
    +                          "Threshold for stopping early when fit with 
validation is used. " +
    +                          "If the error rate on the validation input 
changes by less than the " +
    +                          "validationTol, then learning will stop early 
(before `maxIter`). " +
    +                          "This parameter is ignored when fit without 
validation is used.",
    +                          typeConverter=TypeConverters.toFloat)
    +
         @keyword_only
         def __init__(self, featuresCol="features", labelCol="label", 
predictionCol="prediction",
                      maxDepth=5, maxBins=32, minInstancesPerNode=1, 
minInfoGain=0.0,
                      maxMemoryInMB=256, cacheNodeIds=False, 
checkpointInterval=10, lossType="logistic",
                      maxIter=20, stepSize=0.1, seed=None, subsamplingRate=1.0,
    -                 featureSubsetStrategy="all"):
    +                 featureSubsetStrategy="all", validationTol=0.01):
    --- End diff --
    
    Shouldn't `validationIndicatorCol` be in `init` too? Set to None default?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to