Github user huaxingao commented on a diff in the pull request:
https://github.com/apache/spark/pull/20968#discussion_r179791957
--- Diff: python/pyspark/ml/feature.py ---
@@ -2342,8 +2342,38 @@ def mean(self):
return self._call_java("mean")
+class _StringIndexerParams(JavaParams, HasInputCol, HasOutputCol):
+ """
+ Params for :py:attr:`StringIndexer` and :py:attr:`StringIndexerModel`.
+ """
+
+ stringOrderType = Param(Params._dummy(), "stringOrderType",
+ "How to order labels of string column. The
first label after " +
+ "ordering is assigned an index of 0. Supported
options: " +
+ "frequencyDesc, frequencyAsc, alphabetDesc,
alphabetAsc.",
+ typeConverter=TypeConverters.toString)
+
+ handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle
invalid data (unseen " +
+ "or NULL values) in features and label column of
string type. " +
+ "Options are 'skip' (filter out rows with
invalid data), " +
+ "error (throw an error), or 'keep' (put invalid
data " +
+ "in a special additional bucket, at index
numLabels).",
+ typeConverter=TypeConverters.toString)
+
+ def __init__(self, *args):
+ super(_StringIndexerParams, self).__init__(*args)
+ self._setDefault(handleInvalid="error",
stringOrderType="frequencyDesc")
+
+ @since("2.3.0")
+ def getStringOrderType(self):
+ """
+ Gets the value of :py:attr:`stringOrderType` or its default value
'frequencyDesc'.
+ """
+ return self.getOrDefault(self.stringOrderType)
+
+
@inherit_doc
-class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol,
HasHandleInvalid, JavaMLReadable,
+class StringIndexer(JavaEstimator, _StringIndexerParams, HasHandleInvalid,
JavaMLReadable,
--- End diff --
@BryanCutler Thanks a lot for your comments. I will change this.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]