[GitHub] spark pull request #16770: [SPARK-15009][PYTHON][ML] Construct a CountVector...

holdenk Fri, 16 Mar 2018 11:38:41 -0700

Github user holdenk commented on a diff in the pull request:

    https://github.com/apache/spark/pull/16770#discussion_r175177201
  
    --- Diff: python/pyspark/ml/feature.py ---
    @@ -497,66 +544,61 @@ def setMinTF(self, value):
             """
             return self._set(minTF=value)
     
    -    @since("1.6.0")
    -    def getMinTF(self):
    -        """
    -        Gets the value of minTF or its default value.
    -        """
    -        return self.getOrDefault(self.minTF)
    -
         @since("1.6.0")
         def setMinDF(self, value):
             """
             Sets the value of :py:attr:`minDF`.
             """
             return self._set(minDF=value)
     
    -    @since("1.6.0")
    -    def getMinDF(self):
    -        """
    -        Gets the value of minDF or its default value.
    -        """
    -        return self.getOrDefault(self.minDF)
    -
         @since("1.6.0")
         def setVocabSize(self, value):
             """
             Sets the value of :py:attr:`vocabSize`.
             """
             return self._set(vocabSize=value)
     
    -    @since("1.6.0")
    -    def getVocabSize(self):
    -        """
    -        Gets the value of vocabSize or its default value.
    -        """
    -        return self.getOrDefault(self.vocabSize)
    -
         @since("2.0.0")
         def setBinary(self, value):
             """
             Sets the value of :py:attr:`binary`.
             """
             return self._set(binary=value)
     
    -    @since("2.0.0")
    -    def getBinary(self):
    -        """
    -        Gets the value of binary or its default value.
    -        """
    -        return self.getOrDefault(self.binary)
    -
         def _create_model(self, java_model):
             return CountVectorizerModel(java_model)
     
     
    -class CountVectorizerModel(JavaModel, JavaMLReadable, JavaMLWritable):
    +@inherit_doc
    +class CountVectorizerModel(JavaModel, _CountVectorizerParams, 
JavaMLReadable, JavaMLWritable):
         """
         Model fitted by :py:class:`CountVectorizer`.
     
         .. versionadded:: 1.6.0
         """
     
    +    @classmethod
    +    @since("2.4.0")
    +    def from_vocabulary(cls, vocabulary, inputCol, outputCol=None, 
minTF=None, binary=None):
    +        """
    +        Construct the model directly from a vocabulary list of strings,
    +        requires an active SparkContext.
    +        """
    +        sc = SparkContext._active_spark_context
    +        java_class = sc._gateway.jvm.java.lang.String
    +        jvocab = CountVectorizerModel._new_java_array(vocabulary, 
java_class)
    +        model = CountVectorizerModel._create_from_java_class(
    +            "org.apache.spark.ml.feature.CountVectorizerModel", jvocab)
    +        model.setInputCol(inputCol)
    +        if outputCol is not None:
    +            model.setOutputCol(outputCol)
    +        if minTF is not None:
    +            model.setMinTF(minTF)
    +        if binary is not None:
    +            model.setBinary(binary)
    +        model._set(vocabSize=len(vocabulary))
    --- End diff --
    
    sgtm



---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] spark pull request #16770: [SPARK-15009][PYTHON][ML] Construct a CountVector...

Reply via email to