Github user BryanCutler commented on a diff in the pull request:

    https://github.com/apache/spark/pull/20968#discussion_r179623749
  
    --- Diff: python/pyspark/ml/feature.py ---
    @@ -2342,8 +2342,38 @@ def mean(self):
             return self._call_java("mean")
     
     
    +class _StringIndexerParams(JavaParams, HasInputCol, HasOutputCol):
    +    """
    +    Params for :py:attr:`StringIndexer` and :py:attr:`StringIndexerModel`.
    +    """
    +
    +    stringOrderType = Param(Params._dummy(), "stringOrderType",
    +                            "How to order labels of string column. The 
first label after " +
    +                            "ordering is assigned an index of 0. Supported 
options: " +
    +                            "frequencyDesc, frequencyAsc, alphabetDesc, 
alphabetAsc.",
    +                            typeConverter=TypeConverters.toString)
    +
    +    handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle 
invalid data (unseen " +
    +                          "or NULL values) in features and label column of 
string type. " +
    +                          "Options are 'skip' (filter out rows with 
invalid data), " +
    +                          "error (throw an error), or 'keep' (put invalid 
data " +
    +                          "in a special additional bucket, at index 
numLabels).",
    +                          typeConverter=TypeConverters.toString)
    +
    +    def __init__(self, *args):
    +        super(_StringIndexerParams, self).__init__(*args)
    +        self._setDefault(handleInvalid="error", 
stringOrderType="frequencyDesc")
    +
    +    @since("2.3.0")
    +    def getStringOrderType(self):
    +        """
    +        Gets the value of :py:attr:`stringOrderType` or its default value 
'frequencyDesc'.
    +        """
    +        return self.getOrDefault(self.stringOrderType)
    +
    +
     @inherit_doc
    -class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, 
HasHandleInvalid, JavaMLReadable,
    +class StringIndexer(JavaEstimator, _StringIndexerParams, HasHandleInvalid, 
JavaMLReadable,
    --- End diff --
    
    you should move `HasHandleInvalid` to be a trait for `_StringIndexerParam`


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to