pitrou commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r447751371



##########
File path: python/pyarrow/scalar.pxi
##########
@@ -16,1198 +16,745 @@
 # under the License.
 
 
-_NULL = NA = None
+import collections
 
 
 cdef class Scalar:
     """
-    The base class for all array elements.
+    The base class for scalars.
     """
 
+    def __init__(self):
+        raise TypeError("Do not call {}'s constructor directly, use "
+                        "pa.scalar() instead.".format(self.__class__.__name__))
 
-cdef class NullType(Scalar):
-    """
-    Singleton for null array elements.
-    """
-    # TODO rename this NullValue?
+    cdef void init(self, const shared_ptr[CScalar]& wrapped):
+        self.wrapped = wrapped
 
-    def __cinit__(self):
-        global NA
-        if NA is not None:
-            raise Exception('Cannot create multiple NAType instances')
+    @staticmethod
+    cdef wrap(const shared_ptr[CScalar]& wrapped):
+        cdef:
+            Scalar self
+            Type type_id = wrapped.get().type.get().id()
+
+        if type_id == _Type_NA:
+            return _NULL
+
+        typ = _scalar_classes[type_id]
+        self = typ.__new__(typ)
+        self.init(wrapped)
+
+        return self
+
+    cdef inline shared_ptr[CScalar] unwrap(self) nogil:
+        return self.wrapped
+
+    @property
+    def type(self):
+        return pyarrow_wrap_data_type(self.wrapped.get().type)
 
-        self.type = null()
+    @property
+    def is_valid(self):
+        return self.wrapped.get().is_valid
 
     def __repr__(self):
-        return 'NULL'
+        return '<pyarrow.{}: {!r}>'.format(
+            self.__class__.__name__, self.as_py()
+        )
 
-    def as_py(self):
-        """
-        Return None
-        """
-        return None
+    def __str__(self):
+        return str(self.as_py())
+
+    def equals(self, Scalar other):
+        return self.wrapped.get().Equals(other.unwrap().get()[0])
 
     def __eq__(self, other):
-        return NA
+        try:
+            if not isinstance(other, Scalar):
+                other = scalar(other, type=self.type)
+            return self.equals(other)
+        except (TypeError, ValueError, ArrowInvalid):
+            return NotImplemented
+
+    def __hash__(self):
+        cdef CScalarHash hasher
+        return hasher(self.wrapped)
+
+    def as_py(self):
+        raise NotImplementedError()
 
 
-_NULL = NA = NullType()
+_NULL = NA = None
 
 
-cdef class ArrayValue(Scalar):
+cdef class NullScalar(Scalar):
     """
-    The base class for non-null array elements.
+    Concrete class for null scalars.
     """
 
-    def __init__(self):
-        raise TypeError("Do not call {}'s constructor directly, use array "
-                        "subscription instead."
-                        .format(self.__class__.__name__))
+    def __cinit__(self):
+        global NA
+        if NA is not None:
+            raise Exception('Cannot create multiple NAType instances')
+        self.init(shared_ptr[CScalar](new CNullScalar()))
 
-    cdef void init(self, DataType type, const shared_ptr[CArray]& sp_array,
-                   int64_t index):
-        self.type = type
-        self.index = index
-        self._set_array(sp_array)
+    def __init__(self):
+        pass
 
-    cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
-        self.sp_array = sp_array
+    def __eq__(self, other):
+        return NA
 
-    def __repr__(self):
-        if hasattr(self, 'as_py'):
-            return repr(self.as_py())
-        else:
-            return super(Scalar, self).__repr__()
+    def __hash__(self):
+        cdef CScalarHash hasher
+        return hasher(self.wrapped)
 
-    def __str__(self):
-        if hasattr(self, 'as_py'):
-            return str(self.as_py())
-        else:
-            return super(Scalar, self).__str__()
+    def as_py(self):
+        """
+        Return this value as a Python None.
+        """
+        return None
 
-    def __eq__(self, other):
-        if hasattr(self, 'as_py'):
-            if isinstance(other, ArrayValue):
-                other = other.as_py()
-            return self.as_py() == other
-        else:
-            raise NotImplementedError(
-                "Cannot compare Arrow values that don't support as_py()")
 
-    def __hash__(self):
-        return hash(self.as_py())
+_NULL = NA = NullScalar()
 
 
-cdef class BooleanValue(ArrayValue):
+cdef class BooleanScalar(Scalar):
     """
-    Concrete class for boolean array elements.
+    Concrete class for boolean scalars.
     """
 
     def as_py(self):
         """
         Return this value as a Python bool.
         """
-        cdef CBooleanArray* ap = <CBooleanArray*> self.sp_array.get()
-        return ap.Value(self.index)
+        cdef CBooleanScalar* sp = <CBooleanScalar*> self.wrapped.get()
+        return sp.value if sp.is_valid else None
 
 
-cdef class Int8Value(ArrayValue):
+cdef class UInt8Scalar(Scalar):
     """
-    Concrete class for int8 array elements.
+    Concrete class for uint8 scalars.
     """
 
     def as_py(self):
         """
         Return this value as a Python int.
         """
-        cdef CInt8Array* ap = <CInt8Array*> self.sp_array.get()
-        return ap.Value(self.index)
+        cdef CUInt8Scalar* sp = <CUInt8Scalar*> self.wrapped.get()
+        return sp.value if sp.is_valid else None
 
 
-cdef class UInt8Value(ArrayValue):
+cdef class Int8Scalar(Scalar):
     """
-    Concrete class for uint8 array elements.
+    Concrete class for int8 scalars.
     """
 
     def as_py(self):
         """
         Return this value as a Python int.
         """
-        cdef CUInt8Array* ap = <CUInt8Array*> self.sp_array.get()
-        return ap.Value(self.index)
+        cdef CInt8Scalar* sp = <CInt8Scalar*> self.wrapped.get()
+        return sp.value if sp.is_valid else None
 
 
-cdef class Int16Value(ArrayValue):
+cdef class UInt16Scalar(Scalar):
     """
-    Concrete class for int16 array elements.
+    Concrete class for uint16 scalars.
     """
 
     def as_py(self):
         """
         Return this value as a Python int.
         """
-        cdef CInt16Array* ap = <CInt16Array*> self.sp_array.get()
-        return ap.Value(self.index)
+        cdef CUInt16Scalar* sp = <CUInt16Scalar*> self.wrapped.get()
+        return sp.value if sp.is_valid else None
 
 
-cdef class UInt16Value(ArrayValue):
+cdef class Int16Scalar(Scalar):
     """
-    Concrete class for uint16 array elements.
+    Concrete class for int16 scalars.
     """
 
     def as_py(self):
         """
         Return this value as a Python int.
         """
-        cdef CUInt16Array* ap = <CUInt16Array*> self.sp_array.get()
-        return ap.Value(self.index)
+        cdef CInt16Scalar* sp = <CInt16Scalar*> self.wrapped.get()
+        return sp.value if sp.is_valid else None
 
 
-cdef class Int32Value(ArrayValue):
+cdef class UInt32Scalar(Scalar):
     """
-    Concrete class for int32 array elements.
+    Concrete class for uint32 scalars.
     """
 
     def as_py(self):
         """
         Return this value as a Python int.
         """
-        cdef CInt32Array* ap = <CInt32Array*> self.sp_array.get()
-        return ap.Value(self.index)
+        cdef CUInt32Scalar* sp = <CUInt32Scalar*> self.wrapped.get()
+        return sp.value if sp.is_valid else None
 
 
-cdef class UInt32Value(ArrayValue):
+cdef class Int32Scalar(Scalar):
     """
-    Concrete class for uint32 array elements.
+    Concrete class for int32 scalars.
     """
 
     def as_py(self):
         """
         Return this value as a Python int.
         """
-        cdef CUInt32Array* ap = <CUInt32Array*> self.sp_array.get()
-        return ap.Value(self.index)
+        cdef CInt32Scalar* sp = <CInt32Scalar*> self.wrapped.get()
+        return sp.value if sp.is_valid else None
 
 
-cdef class Int64Value(ArrayValue):
+cdef class UInt64Scalar(Scalar):
     """
-    Concrete class for int64 array elements.
+    Concrete class for uint64 scalars.
     """
 
     def as_py(self):
         """
         Return this value as a Python int.
         """
-        cdef CInt64Array* ap = <CInt64Array*> self.sp_array.get()
-        return ap.Value(self.index)
+        cdef CUInt64Scalar* sp = <CUInt64Scalar*> self.wrapped.get()
+        return sp.value if sp.is_valid else None
 
 
-cdef class UInt64Value(ArrayValue):
+cdef class Int64Scalar(Scalar):
     """
-    Concrete class for uint64 array elements.
+    Concrete class for int64 scalars.
     """
 
     def as_py(self):
         """
         Return this value as a Python int.
         """
-        cdef CUInt64Array* ap = <CUInt64Array*> self.sp_array.get()
-        return ap.Value(self.index)
+        cdef CInt64Scalar* sp = <CInt64Scalar*> self.wrapped.get()
+        return sp.value if sp.is_valid else None
 
 
-cdef class Date32Value(ArrayValue):
+cdef class HalfFloatScalar(Scalar):
     """
-    Concrete class for date32 array elements.
+    Concrete class for float scalars.
     """
 
+    def __hash__(self):
+        cdef CScalarHash hasher
+        return hasher(self.wrapped)
+
+    def __eq__(self, other):
+        if hasattr(self, 'as_py'):
+            if isinstance(other, Scalar):
+                other = other.as_py()
+            return self.as_py() == other
+        else:
+            raise NotImplementedError
+
     def as_py(self):
         """
-        Return this value as a Python datetime.datetime instance.
+        Return this value as a Python float.
         """
-        cdef CDate32Array* ap = <CDate32Array*> self.sp_array.get()
+        cdef CHalfFloatScalar* sp = <CHalfFloatScalar*> self.wrapped.get()
+        return PyHalf_FromHalf(sp.value) if sp.is_valid else None
 
-        # Shift to seconds since epoch
-        return (datetime.date(1970, 1, 1) +
-                datetime.timedelta(days=ap.Value(self.index)))
+
+cdef class FloatScalar(Scalar):
+    """
+    Concrete class for float scalars.
+    """
+
+    def as_py(self):
+        """
+        Return this value as a Python float.
+        """
+        cdef CFloatScalar* sp = <CFloatScalar*> self.wrapped.get()
+        return sp.value if sp.is_valid else None
 
 
-cdef class Date64Value(ArrayValue):
+cdef class DoubleScalar(Scalar):
     """
-    Concrete class for date64 array elements.
+    Concrete class for double scalars.
     """
 
     def as_py(self):
         """
-        Return this value as a Python datetime.datetime instance.
+        Return this value as a Python float.
         """
-        cdef CDate64Array* ap = <CDate64Array*> self.sp_array.get()
-        return (datetime.date(1970, 1, 1) +
-                datetime.timedelta(
-                    days=ap.Value(self.index) / 86400000))
+        cdef CDoubleScalar* sp = <CDoubleScalar*> self.wrapped.get()
+        return sp.value if sp.is_valid else None
 
 
-cdef class Time32Value(ArrayValue):
+cdef class Decimal128Scalar(Scalar):
     """
-    Concrete class for time32 array elements.
+    Concrete class for decimal128 scalars.
     """
 
     def as_py(self):
         """
-        Return this value as a Python datetime.timedelta instance.
+        Return this value as a Python Decimal.
         """
         cdef:
-            CTime32Array* ap = <CTime32Array*> self.sp_array.get()
-            CTime32Type* dtype = <CTime32Type*> ap.type().get()
-
-        if dtype.unit() == TimeUnit_SECOND:
-            delta = datetime.timedelta(seconds=ap.Value(self.index))
-            return (datetime.datetime(1970, 1, 1) + delta).time()
+            CDecimal128Scalar* sp = <CDecimal128Scalar*> self.wrapped.get()
+            CDecimal128Type* dtype = <CDecimal128Type*> sp.type.get()
+        if sp.is_valid:
+            return _pydecimal.Decimal(

Review comment:
       Wow, this is misleading, before `_pydecimal` is the name of the pure 
Python implementation of `decimal`:
   ```python
   >>> import _pydecimal                                                        
                                                                                
                  
   >>> _pydecimal                                                               
                                                                                
                  
   <module '_pydecimal' from 
'/home/antoine/miniconda3/envs/pyarrow/lib/python3.7/_pydecimal.py'>
   >>> import decimal                                                           
                                                                                
                  
   >>> decimal is _pydecimal                                                    
                                                                                
                  
   False
   ```




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to