[GitHub] spark pull request: [SPARK-2179][SQL] Public API for DataTypes and...

marmbrus Mon, 28 Jul 2014 11:41:30 -0700

Github user marmbrus commented on a diff in the pull request:

    https://github.com/apache/spark/pull/1346#discussion_r15481537
  
    --- Diff: python/pyspark/sql.py ---
    @@ -20,8 +20,413 @@
     
     from py4j.protocol import Py4JError
     
    -__all__ = ["SQLContext", "HiveContext", "LocalHiveContext", 
"TestHiveContext", "SchemaRDD", "Row"]
    +__all__ = [
    +    "StringType", "BinaryType", "BooleanType", "DecimalType", "DoubleType",
    +    "FloatType", "ByteType", "IntegerType", "LongType", "ShortType",
    +    "ArrayType", "MapType", "StructField", "StructType",
    +    "SQLContext", "HiveContext", "LocalHiveContext", "TestHiveContext", 
"SchemaRDD", "Row"]
     
    +class PrimitiveTypeSingleton(type):
    +    _instances = {}
    +    def __call__(cls):
    +        if cls not in cls._instances:
    +            cls._instances[cls] = super(PrimitiveTypeSingleton, 
cls).__call__()
    +        return cls._instances[cls]
    +
    +class StringType(object):
    +    """Spark SQL StringType
    +
    +    The data type representing string values.
    +
    +    """
    +    __metaclass__ = PrimitiveTypeSingleton
    +
    +    def _get_scala_type_string(self):
    +        return "StringType"
    +
    +class BinaryType(object):
    +    """Spark SQL BinaryType
    +
    +    The data type representing bytes values and bytearray values.
    +
    +    """
    +    __metaclass__ = PrimitiveTypeSingleton
    +
    +    def _get_scala_type_string(self):
    +        return "BinaryType"
    +
    +class BooleanType(object):
    +    """Spark SQL BooleanType
    +
    +    The data type representing bool values.
    +
    +    """
    +    __metaclass__ = PrimitiveTypeSingleton
    +
    +    def _get_scala_type_string(self):
    +        return "BooleanType"
    +
    +class TimestampType(object):
    +    """Spark SQL TimestampType"""
    +    __metaclass__ = PrimitiveTypeSingleton
    +
    +    def _get_scala_type_string(self):
    +        return "TimestampType"
    +
    +class DecimalType(object):
    +    """Spark SQL DecimalType
    +
    +    The data type representing decimal.Decimal values.
    +
    +    """
    +    __metaclass__ = PrimitiveTypeSingleton
    +
    +    def _get_scala_type_string(self):
    +        return "DecimalType"
    +
    +class DoubleType(object):
    +    """Spark SQL DoubleType
    +
    +    The data type representing float values. Because a float value
    +
    +    """
    +    __metaclass__ = PrimitiveTypeSingleton
    +
    +    def _get_scala_type_string(self):
    +        return "DoubleType"
    +
    +class FloatType(object):
    +    """Spark SQL FloatType
    +
    +    For PySpark, please use L{DoubleType} instead of using L{FloatType}.
    +
    +    """
    +    __metaclass__ = PrimitiveTypeSingleton
    +
    +    def _get_scala_type_string(self):
    +        return "FloatType"
    +
    +class ByteType(object):
    +    """Spark SQL ByteType
    +
    +    For PySpark, please use L{IntegerType} instead of using L{ByteType}.
    +
    +    """
    +    __metaclass__ = PrimitiveTypeSingleton
    +
    +    def _get_scala_type_string(self):
    +        return "ByteType"
    +
    +class IntegerType(object):
    +    """Spark SQL IntegerType
    +
    +    The data type representing int values.
    +
    +    """
    +    __metaclass__ = PrimitiveTypeSingleton
    +
    +    def _get_scala_type_string(self):
    +        return "IntegerType"
    +
    +class LongType(object):
    +    """Spark SQL LongType
    +
    +    The data type representing long values. If the any value is beyond the 
range of
    +    [-9223372036854775808, 9223372036854775807], please use DecimalType.
    +
    +    """
    +    __metaclass__ = PrimitiveTypeSingleton
    +
    +    def _get_scala_type_string(self):
    +        return "LongType"
    +
    +class ShortType(object):
    +    """Spark SQL ShortType
    +
    +    For PySpark, please use L{IntegerType} instead of using L{ShortType}.
    +
    +    """
    +    __metaclass__ = PrimitiveTypeSingleton
    +
    +    def _get_scala_type_string(self):
    +        return "ShortType"
    +
    +class ArrayType(object):
    +    """Spark SQL ArrayType
    +
    +    The data type representing list values.
    +
    +    """
    +    def __init__(self, elementType, containsNull):
    +        """Creates an ArrayType
    +
    +        :param elementType: the data type of elements.
    +        :param containsNull: indicates whether the list contains null 
values.
    +        :return:
    +
    +        >>> ArrayType(StringType, True) == ArrayType(StringType, False)
    +        False
    +        >>> ArrayType(StringType, True) == ArrayType(StringType, True)
    +        True
    +        """
    +        self.elementType = elementType
    +        self.containsNull = containsNull
    +
    +    def _get_scala_type_string(self):
    +        return "ArrayType(" + self.elementType._get_scala_type_string() + 
"," + \
    +               str(self.containsNull).lower() + ")"
    +
    +    def __eq__(self, other):
    +        return (isinstance(other, self.__class__) and \
    +            self.elementType == other.elementType and \
    +            self.containsNull == other.containsNull)
    +
    +    def __ne__(self, other):
    +        return not self.__eq__(other)
    +
    +
    +class MapType(object):
    +    """Spark SQL MapType
    +
    +    The data type representing dict values.
    +
    +    """
    +    def __init__(self, keyType, valueType):
    --- End diff --
    
    I thought we decided in the meeting that we need to have a null bit for the 
key and value since hive does.



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

[GitHub] spark pull request: [SPARK-2179][SQL] Public API for DataTypes and...

Reply via email to