Github user mengxr commented on a diff in the pull request:

    https://github.com/apache/spark/pull/2819#discussion_r19454181
  
    --- Diff: python/pyspark/mllib/feature.py ---
    @@ -18,59 +18,348 @@
     """
     Python package for feature in MLlib.
     """
    +import sys
    +import warnings
    +
    +import py4j.protocol
    +from py4j.protocol import Py4JJavaError
    +from py4j.java_gateway import JavaObject
    +
    +from pyspark import RDD, SparkContext
     from pyspark.serializers import PickleSerializer, AutoBatchedSerializer
    -from pyspark.mllib.linalg import _convert_to_vector, _to_java_object_rdd
    +from pyspark.mllib.linalg import Vectors, _to_java_object_rdd
    +
    +__all__ = ['Normalizer', 'StandardScalerModel', 'StandardScaler',
    +           'HashTF', 'IDFModel', 'IDF',
    +           'Word2Vec', 'Word2VecModel']
    +
    +
    +# Hack for support float('inf') in Py4j
    +old_smart_decode = py4j.protocol.smart_decode
    +
    +float_str_mapping = {
    +    u'nan': u'NaN',
    +    u'inf': u'Infinity',
    +    u'-inf': u'-Infinity',
    +}
    +
    +
    +def new_smart_decode(obj):
    +    if isinstance(obj, float):
    +        s = unicode(obj)
    +        return float_str_mapping.get(s, s)
    +    return old_smart_decode(obj)
    +
    +py4j.protocol.smart_decode = new_smart_decode
    +
    +
    +# TODO: move these helper functions into utils
    +_picklable_classes = [
    +    'LinkedList',
    +    'SparseVector',
    +    'DenseVector',
    +    'DenseMatrix',
    +    'Rating',
    +    'LabeledPoint',
    +]
    +
    +
    +def _py2java(sc, a):
    +    """ Convert Python object into Java """
    +    if isinstance(a, RDD):
    +        a = _to_java_object_rdd(a)
    +    elif not isinstance(a, (int, long, float, bool, basestring)):
    +        bytes = bytearray(PickleSerializer().dumps(a))
    +        a = sc._jvm.SerDe.loads(bytes)
    +    return a
    +
    +
    +def _java2py(sc, r):
    +    if isinstance(r, JavaObject):
    +        clsName = r.getClass().getSimpleName()
    +        if clsName in ("RDD", "JavaRDD"):
    +            if clsName == "RDD":
    +                r = r.toJavaRDD()
    +            jrdd = sc._jvm.SerDe.javaToPython(r)
    +            return RDD(jrdd, sc, AutoBatchedSerializer(PickleSerializer()))
     
    -__all__ = ['Word2Vec', 'Word2VecModel']
    +        elif clsName in _picklable_classes:
    +            r = sc._jvm.SerDe.dumps(r)
     
    +    if isinstance(r, bytearray):
    +        r = PickleSerializer().loads(str(r))
    +    return r
     
    -class Word2VecModel(object):
    +
    +def _callJavaFunc(sc, func, *args):
    +    """ Call Java Function
         """
    -    class for Word2Vec model
    +    args = [_py2java(sc, a) for a in args]
    +    return _java2py(sc, func(*args))
    +
    +
    +def _callAPI(sc, name, *args):
    +    """ Call API in PythonMLLibAPI
         """
    -    def __init__(self, sc, java_model):
    +    api = getattr(sc._jvm.PythonMLLibAPI(), name)
    +    return _callJavaFunc(sc, api, *args)
    +
    +
    +class VectorTransformer(object):
    +    """
    +    :: DeveloperApi ::
    +    Base class for transformation of a vector or RDD of vector
    +    """
    +    def transform(self, vector):
             """
    -        :param sc:  Spark context
    -        :param java_model:  Handle to Java model object
    +        Applies transformation on a vector.
    +
    +        :param vector: vector to be transformed.
             """
    +        raise NotImplementedError
    +
    +
    +class Normalizer(VectorTransformer):
    +    """
    +    :: Experimental ::
    +    Normalizes samples individually to unit L^p^ norm
    --- End diff --
    
    `L^p^` doesn't show up correctly in the generated doc. This is `L` with 
subscript `p`, so with Sphinx it should be
    
    ~~~
    L\ :sub:`p`\ norm
    ~~~


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to