Github user holdenk commented on a diff in the pull request:
https://github.com/apache/spark/pull/16770#discussion_r175177201
--- Diff: python/pyspark/ml/feature.py ---
@@ -497,66 +544,61 @@ def setMinTF(self, value):
"""
return self._set(minTF=value)
- @since("1.6.0")
- def getMinTF(self):
- """
- Gets the value of minTF or its default value.
- """
- return self.getOrDefault(self.minTF)
-
@since("1.6.0")
def setMinDF(self, value):
"""
Sets the value of :py:attr:`minDF`.
"""
return self._set(minDF=value)
- @since("1.6.0")
- def getMinDF(self):
- """
- Gets the value of minDF or its default value.
- """
- return self.getOrDefault(self.minDF)
-
@since("1.6.0")
def setVocabSize(self, value):
"""
Sets the value of :py:attr:`vocabSize`.
"""
return self._set(vocabSize=value)
- @since("1.6.0")
- def getVocabSize(self):
- """
- Gets the value of vocabSize or its default value.
- """
- return self.getOrDefault(self.vocabSize)
-
@since("2.0.0")
def setBinary(self, value):
"""
Sets the value of :py:attr:`binary`.
"""
return self._set(binary=value)
- @since("2.0.0")
- def getBinary(self):
- """
- Gets the value of binary or its default value.
- """
- return self.getOrDefault(self.binary)
-
def _create_model(self, java_model):
return CountVectorizerModel(java_model)
-class CountVectorizerModel(JavaModel, JavaMLReadable, JavaMLWritable):
+@inherit_doc
+class CountVectorizerModel(JavaModel, _CountVectorizerParams,
JavaMLReadable, JavaMLWritable):
"""
Model fitted by :py:class:`CountVectorizer`.
.. versionadded:: 1.6.0
"""
+ @classmethod
+ @since("2.4.0")
+ def from_vocabulary(cls, vocabulary, inputCol, outputCol=None,
minTF=None, binary=None):
+ """
+ Construct the model directly from a vocabulary list of strings,
+ requires an active SparkContext.
+ """
+ sc = SparkContext._active_spark_context
+ java_class = sc._gateway.jvm.java.lang.String
+ jvocab = CountVectorizerModel._new_java_array(vocabulary,
java_class)
+ model = CountVectorizerModel._create_from_java_class(
+ "org.apache.spark.ml.feature.CountVectorizerModel", jvocab)
+ model.setInputCol(inputCol)
+ if outputCol is not None:
+ model.setOutputCol(outputCol)
+ if minTF is not None:
+ model.setMinTF(minTF)
+ if binary is not None:
+ model.setBinary(binary)
+ model._set(vocabSize=len(vocabulary))
--- End diff --
sgtm
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]