[GitHub] [arrow] pitrou commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-07-06 Thread GitBox


pitrou commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r450371161



##
File path: cpp/src/arrow/scalar_test.cc
##
@@ -451,6 +565,127 @@ TEST(TestStructScalar, FieldAccess) {
 
   ASSERT_RAISES(Invalid, abc.field(5).status());
   ASSERT_RAISES(Invalid, abc.field("c").status());
+
+  ASSERT_OK_AND_ASSIGN(auto d, abc.field("d"));
+  ASSERT_TRUE(d->Equals(MakeNullScalar(int64(;
+}
+
+TEST(TestDictionaryScalar, Basics) {
+  auto ty = dictionary(int8(), utf8());
+  auto dict = ArrayFromJSON(utf8(), "[\"alpha\", \"beta\", \"gamma\"]");
+
+  DictionaryScalar::ValueType alpha;
+  ASSERT_OK_AND_ASSIGN(alpha.index, MakeScalar(int8(), 0));
+  alpha.dictionary = dict;
+
+  DictionaryScalar::ValueType gamma;
+  ASSERT_OK_AND_ASSIGN(gamma.index, MakeScalar(int8(), 2));
+  gamma.dictionary = dict;
+
+  auto scalar_null = MakeNullScalar(ty);
+  auto scalar_alpha = DictionaryScalar(alpha, ty);
+  auto scalar_gamma = DictionaryScalar(gamma, ty);
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_null,
+  checked_cast(*scalar_null).GetEncodedValue());
+  ASSERT_TRUE(encoded_null->Equals(MakeNullScalar(utf8(;
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_alpha,
+  checked_cast(scalar_alpha).GetEncodedValue());
+  ASSERT_TRUE(encoded_alpha->Equals(MakeScalar("alpha")));
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_gamma,
+  checked_cast(scalar_gamma).GetEncodedValue());
+  ASSERT_TRUE(encoded_gamma->Equals(MakeScalar("gamma")));
+
+  // test Array.GetScalar
+  DictionaryArray arr(ty, ArrayFromJSON(int8(), "[2, 0, 1, null]"), dict);
+  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
+  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
+  ASSERT_OK_AND_ASSIGN(auto last, arr.GetScalar(3));
+
+  ASSERT_TRUE(first->Equals(scalar_gamma));
+  ASSERT_TRUE(second->Equals(scalar_alpha));
+  ASSERT_TRUE(last->Equals(scalar_null));
+}
+
+TEST(TestSparseUnionScalar, Basics) {
+  auto ty = sparse_union({field("string", utf8()), field("number", uint64())});
+
+  auto alpha = MakeScalar("alpha");
+  auto beta = MakeScalar("beta");
+  ASSERT_OK_AND_ASSIGN(auto two, MakeScalar(uint64(), 2));
+
+  auto scalar_alpha = SparseUnionScalar(alpha, ty);
+  auto scalar_beta = SparseUnionScalar(beta, ty);
+  auto scalar_two = SparseUnionScalar(two, ty);
+
+  // test Array.GetScalar
+  std::vector> children{
+  ArrayFromJSON(utf8(), "[\"alpha\", \"\", \"beta\", null, \"gamma\"]"),
+  ArrayFromJSON(uint64(), "[1, 2, 11, 22, null]")};
+
+  auto type_ids = ArrayFromJSON(int8(), "[0, 1, 0, 0, 1]");
+  SparseUnionArray arr(ty, 5, children, type_ids->data()->buffers[1]);
+  ASSERT_OK(arr.ValidateFull());
+
+  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
+  ASSERT_TRUE(first->Equals(scalar_alpha));
+
+  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
+  ASSERT_TRUE(second->Equals(scalar_two));
+
+  ASSERT_OK_AND_ASSIGN(auto third, arr.GetScalar(2));
+  ASSERT_TRUE(third->Equals(scalar_beta));
+
+  ASSERT_OK_AND_ASSIGN(auto fourth, arr.GetScalar(3));
+  ASSERT_TRUE(fourth->Equals(MakeNullScalar(ty)));

Review comment:
   It the value scalar is not a nullptr, then it would seem better, unless 
that's complicated.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] pitrou commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-07-06 Thread GitBox


pitrou commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r450328412



##
File path: cpp/src/arrow/scalar_test.cc
##
@@ -451,6 +565,127 @@ TEST(TestStructScalar, FieldAccess) {
 
   ASSERT_RAISES(Invalid, abc.field(5).status());
   ASSERT_RAISES(Invalid, abc.field("c").status());
+
+  ASSERT_OK_AND_ASSIGN(auto d, abc.field("d"));
+  ASSERT_TRUE(d->Equals(MakeNullScalar(int64(;
+}
+
+TEST(TestDictionaryScalar, Basics) {
+  auto ty = dictionary(int8(), utf8());
+  auto dict = ArrayFromJSON(utf8(), "[\"alpha\", \"beta\", \"gamma\"]");
+
+  DictionaryScalar::ValueType alpha;
+  ASSERT_OK_AND_ASSIGN(alpha.index, MakeScalar(int8(), 0));
+  alpha.dictionary = dict;
+
+  DictionaryScalar::ValueType gamma;
+  ASSERT_OK_AND_ASSIGN(gamma.index, MakeScalar(int8(), 2));
+  gamma.dictionary = dict;
+
+  auto scalar_null = MakeNullScalar(ty);
+  auto scalar_alpha = DictionaryScalar(alpha, ty);
+  auto scalar_gamma = DictionaryScalar(gamma, ty);
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_null,
+  checked_cast(*scalar_null).GetEncodedValue());
+  ASSERT_TRUE(encoded_null->Equals(MakeNullScalar(utf8(;
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_alpha,
+  checked_cast(scalar_alpha).GetEncodedValue());
+  ASSERT_TRUE(encoded_alpha->Equals(MakeScalar("alpha")));
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_gamma,
+  checked_cast(scalar_gamma).GetEncodedValue());
+  ASSERT_TRUE(encoded_gamma->Equals(MakeScalar("gamma")));
+
+  // test Array.GetScalar
+  DictionaryArray arr(ty, ArrayFromJSON(int8(), "[2, 0, 1, null]"), dict);
+  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
+  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
+  ASSERT_OK_AND_ASSIGN(auto last, arr.GetScalar(3));
+
+  ASSERT_TRUE(first->Equals(scalar_gamma));
+  ASSERT_TRUE(second->Equals(scalar_alpha));
+  ASSERT_TRUE(last->Equals(scalar_null));
+}
+
+TEST(TestSparseUnionScalar, Basics) {
+  auto ty = sparse_union({field("string", utf8()), field("number", uint64())});
+
+  auto alpha = MakeScalar("alpha");
+  auto beta = MakeScalar("beta");
+  ASSERT_OK_AND_ASSIGN(auto two, MakeScalar(uint64(), 2));
+
+  auto scalar_alpha = SparseUnionScalar(alpha, ty);
+  auto scalar_beta = SparseUnionScalar(beta, ty);
+  auto scalar_two = SparseUnionScalar(two, ty);
+
+  // test Array.GetScalar
+  std::vector> children{
+  ArrayFromJSON(utf8(), "[\"alpha\", \"\", \"beta\", null, \"gamma\"]"),
+  ArrayFromJSON(uint64(), "[1, 2, 11, 22, null]")};
+
+  auto type_ids = ArrayFromJSON(int8(), "[0, 1, 0, 0, 1]");
+  SparseUnionArray arr(ty, 5, children, type_ids->data()->buffers[1]);
+  ASSERT_OK(arr.ValidateFull());
+
+  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
+  ASSERT_TRUE(first->Equals(scalar_alpha));
+
+  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
+  ASSERT_TRUE(second->Equals(scalar_two));
+
+  ASSERT_OK_AND_ASSIGN(auto third, arr.GetScalar(2));
+  ASSERT_TRUE(third->Equals(scalar_beta));
+
+  ASSERT_OK_AND_ASSIGN(auto fourth, arr.GetScalar(3));
+  ASSERT_TRUE(fourth->Equals(MakeNullScalar(ty)));

Review comment:
   Ah, perhaps check that in the test?





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] pitrou commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-07-06 Thread GitBox


pitrou commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r450319790



##
File path: cpp/src/arrow/scalar_test.cc
##
@@ -451,6 +565,127 @@ TEST(TestStructScalar, FieldAccess) {
 
   ASSERT_RAISES(Invalid, abc.field(5).status());
   ASSERT_RAISES(Invalid, abc.field("c").status());
+
+  ASSERT_OK_AND_ASSIGN(auto d, abc.field("d"));
+  ASSERT_TRUE(d->Equals(MakeNullScalar(int64(;
+}
+
+TEST(TestDictionaryScalar, Basics) {
+  auto ty = dictionary(int8(), utf8());
+  auto dict = ArrayFromJSON(utf8(), "[\"alpha\", \"beta\", \"gamma\"]");
+
+  DictionaryScalar::ValueType alpha;
+  ASSERT_OK_AND_ASSIGN(alpha.index, MakeScalar(int8(), 0));
+  alpha.dictionary = dict;
+
+  DictionaryScalar::ValueType gamma;
+  ASSERT_OK_AND_ASSIGN(gamma.index, MakeScalar(int8(), 2));
+  gamma.dictionary = dict;
+
+  auto scalar_null = MakeNullScalar(ty);
+  auto scalar_alpha = DictionaryScalar(alpha, ty);
+  auto scalar_gamma = DictionaryScalar(gamma, ty);
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_null,
+  checked_cast(*scalar_null).GetEncodedValue());
+  ASSERT_TRUE(encoded_null->Equals(MakeNullScalar(utf8(;
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_alpha,
+  checked_cast(scalar_alpha).GetEncodedValue());
+  ASSERT_TRUE(encoded_alpha->Equals(MakeScalar("alpha")));
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_gamma,
+  checked_cast(scalar_gamma).GetEncodedValue());
+  ASSERT_TRUE(encoded_gamma->Equals(MakeScalar("gamma")));
+
+  // test Array.GetScalar
+  DictionaryArray arr(ty, ArrayFromJSON(int8(), "[2, 0, 1, null]"), dict);
+  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
+  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
+  ASSERT_OK_AND_ASSIGN(auto last, arr.GetScalar(3));
+
+  ASSERT_TRUE(first->Equals(scalar_gamma));
+  ASSERT_TRUE(second->Equals(scalar_alpha));
+  ASSERT_TRUE(last->Equals(scalar_null));
+}
+
+TEST(TestSparseUnionScalar, Basics) {
+  auto ty = sparse_union({field("string", utf8()), field("number", uint64())});
+
+  auto alpha = MakeScalar("alpha");
+  auto beta = MakeScalar("beta");
+  ASSERT_OK_AND_ASSIGN(auto two, MakeScalar(uint64(), 2));
+
+  auto scalar_alpha = SparseUnionScalar(alpha, ty);
+  auto scalar_beta = SparseUnionScalar(beta, ty);
+  auto scalar_two = SparseUnionScalar(two, ty);
+
+  // test Array.GetScalar
+  std::vector> children{
+  ArrayFromJSON(utf8(), "[\"alpha\", \"\", \"beta\", null, \"gamma\"]"),
+  ArrayFromJSON(uint64(), "[1, 2, 11, 22, null]")};
+
+  auto type_ids = ArrayFromJSON(int8(), "[0, 1, 0, 0, 1]");
+  SparseUnionArray arr(ty, 5, children, type_ids->data()->buffers[1]);
+  ASSERT_OK(arr.ValidateFull());
+
+  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
+  ASSERT_TRUE(first->Equals(scalar_alpha));
+
+  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
+  ASSERT_TRUE(second->Equals(scalar_two));
+
+  ASSERT_OK_AND_ASSIGN(auto third, arr.GetScalar(2));
+  ASSERT_TRUE(third->Equals(scalar_beta));
+
+  ASSERT_OK_AND_ASSIGN(auto fourth, arr.GetScalar(3));
+  ASSERT_TRUE(fourth->Equals(MakeNullScalar(ty)));

Review comment:
   Ah, I see. Yes, you're probably right. A pity we lose information about 
the value type, then.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] pitrou commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-07-06 Thread GitBox


pitrou commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r450314430



##
File path: cpp/src/arrow/scalar_test.cc
##
@@ -451,6 +565,127 @@ TEST(TestStructScalar, FieldAccess) {
 
   ASSERT_RAISES(Invalid, abc.field(5).status());
   ASSERT_RAISES(Invalid, abc.field("c").status());
+
+  ASSERT_OK_AND_ASSIGN(auto d, abc.field("d"));
+  ASSERT_TRUE(d->Equals(MakeNullScalar(int64(;
+}
+
+TEST(TestDictionaryScalar, Basics) {
+  auto ty = dictionary(int8(), utf8());
+  auto dict = ArrayFromJSON(utf8(), "[\"alpha\", \"beta\", \"gamma\"]");
+
+  DictionaryScalar::ValueType alpha;
+  ASSERT_OK_AND_ASSIGN(alpha.index, MakeScalar(int8(), 0));
+  alpha.dictionary = dict;
+
+  DictionaryScalar::ValueType gamma;
+  ASSERT_OK_AND_ASSIGN(gamma.index, MakeScalar(int8(), 2));
+  gamma.dictionary = dict;
+
+  auto scalar_null = MakeNullScalar(ty);
+  auto scalar_alpha = DictionaryScalar(alpha, ty);
+  auto scalar_gamma = DictionaryScalar(gamma, ty);
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_null,
+  checked_cast(*scalar_null).GetEncodedValue());
+  ASSERT_TRUE(encoded_null->Equals(MakeNullScalar(utf8(;
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_alpha,
+  checked_cast(scalar_alpha).GetEncodedValue());
+  ASSERT_TRUE(encoded_alpha->Equals(MakeScalar("alpha")));
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_gamma,
+  checked_cast(scalar_gamma).GetEncodedValue());
+  ASSERT_TRUE(encoded_gamma->Equals(MakeScalar("gamma")));
+
+  // test Array.GetScalar
+  DictionaryArray arr(ty, ArrayFromJSON(int8(), "[2, 0, 1, null]"), dict);
+  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
+  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
+  ASSERT_OK_AND_ASSIGN(auto last, arr.GetScalar(3));
+
+  ASSERT_TRUE(first->Equals(scalar_gamma));
+  ASSERT_TRUE(second->Equals(scalar_alpha));
+  ASSERT_TRUE(last->Equals(scalar_null));
+}
+
+TEST(TestSparseUnionScalar, Basics) {
+  auto ty = sparse_union({field("string", utf8()), field("number", uint64())});
+
+  auto alpha = MakeScalar("alpha");
+  auto beta = MakeScalar("beta");
+  ASSERT_OK_AND_ASSIGN(auto two, MakeScalar(uint64(), 2));
+
+  auto scalar_alpha = SparseUnionScalar(alpha, ty);
+  auto scalar_beta = SparseUnionScalar(beta, ty);
+  auto scalar_two = SparseUnionScalar(two, ty);
+
+  // test Array.GetScalar
+  std::vector> children{
+  ArrayFromJSON(utf8(), "[\"alpha\", \"\", \"beta\", null, \"gamma\"]"),
+  ArrayFromJSON(uint64(), "[1, 2, 11, 22, null]")};
+
+  auto type_ids = ArrayFromJSON(int8(), "[0, 1, 0, 0, 1]");
+  SparseUnionArray arr(ty, 5, children, type_ids->data()->buffers[1]);
+  ASSERT_OK(arr.ValidateFull());
+
+  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
+  ASSERT_TRUE(first->Equals(scalar_alpha));
+
+  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
+  ASSERT_TRUE(second->Equals(scalar_two));
+
+  ASSERT_OK_AND_ASSIGN(auto third, arr.GetScalar(2));
+  ASSERT_TRUE(third->Equals(scalar_beta));
+
+  ASSERT_OK_AND_ASSIGN(auto fourth, arr.GetScalar(3));
+  ASSERT_TRUE(fourth->Equals(MakeNullScalar(ty)));

Review comment:
   Hmm... interesting. Shouldn't it be `MakeNullScalar(utf8())`?

##
File path: cpp/src/arrow/scalar_test.cc
##
@@ -80,6 +84,15 @@ TYPED_TEST(TestNumericScalar, Basics) {
 
   auto dyn_null_value = MakeNullScalar(expected_type);
   ASSERT_EQ(*null_value, *dyn_null_value);
+
+  // test Array.GetScalar
+  auto arr = ArrayFromJSON(expected_type, "[null, 1, 2]");
+  ASSERT_OK_AND_ASSIGN(auto null, arr->GetScalar(0));
+  ASSERT_OK_AND_ASSIGN(auto one, arr->GetScalar(1));
+  ASSERT_OK_AND_ASSIGN(auto two, arr->GetScalar(2));
+  ASSERT_TRUE(null->Equals(*null_value));
+  ASSERT_TRUE(one->Equals(ScalarType(1)));
+  ASSERT_TRUE(two->Equals(ScalarType(2)));

Review comment:
   You never check that `Equals` returns false?

##
File path: cpp/src/arrow/scalar.cc
##
@@ -185,6 +192,35 @@ 
DictionaryScalar::DictionaryScalar(std::shared_ptr type)
 0)
 .ValueOrDie()} {}
 
+Result> DictionaryScalar::GetEncodedValue() const {
+  const auto& dict_type = checked_cast(*type);
+
+  if (!is_valid) {
+return MakeNullScalar(dict_type.value_type());
+  }
+
+  int64_t index_value = 0;
+  switch (dict_type.index_type()->id()) {
+case Type::INT8:
+  index_value = checked_cast(*value.index).value;
+  break;
+case Type::INT16:
+  index_value = checked_cast(*value.index).value;
+  break;
+case Type::INT32:
+  index_value = checked_cast(*value.index).value;
+  break;
+case Type::INT64:
+  index_value = checked_cast(*value.index).value;
+  break;
+default:
+  return Status::Invalid("Not implemented dictionary index type");

Review comment:
   `Status::TypeError`?

##
File path: cpp/src/arrow/scalar_test.cc
##
@@ -451,6 +565,127 @@ TEST(TestStructScalar, FieldAccess) {
 
   ASSERT_RAISES(Invalid, 

[GitHub] [arrow] pitrou commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-06-30 Thread GitBox


pitrou commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r447751371



##
File path: python/pyarrow/scalar.pxi
##
@@ -16,1198 +16,745 @@
 # under the License.
 
 
-_NULL = NA = None
+import collections
 
 
 cdef class Scalar:
 """
-The base class for all array elements.
+The base class for scalars.
 """
 
+def __init__(self):
+raise TypeError("Do not call {}'s constructor directly, use "
+"pa.scalar() instead.".format(self.__class__.__name__))
 
-cdef class NullType(Scalar):
-"""
-Singleton for null array elements.
-"""
-# TODO rename this NullValue?
+cdef void init(self, const shared_ptr[CScalar]& wrapped):
+self.wrapped = wrapped
 
-def __cinit__(self):
-global NA
-if NA is not None:
-raise Exception('Cannot create multiple NAType instances')
+@staticmethod
+cdef wrap(const shared_ptr[CScalar]& wrapped):
+cdef:
+Scalar self
+Type type_id = wrapped.get().type.get().id()
+
+if type_id == _Type_NA:
+return _NULL
+
+typ = _scalar_classes[type_id]
+self = typ.__new__(typ)
+self.init(wrapped)
+
+return self
+
+cdef inline shared_ptr[CScalar] unwrap(self) nogil:
+return self.wrapped
+
+@property
+def type(self):
+return pyarrow_wrap_data_type(self.wrapped.get().type)
 
-self.type = null()
+@property
+def is_valid(self):
+return self.wrapped.get().is_valid
 
 def __repr__(self):
-return 'NULL'
+return ''.format(
+self.__class__.__name__, self.as_py()
+)
 
-def as_py(self):
-"""
-Return None
-"""
-return None
+def __str__(self):
+return str(self.as_py())
+
+def equals(self, Scalar other):
+return self.wrapped.get().Equals(other.unwrap().get()[0])
 
 def __eq__(self, other):
-return NA
+try:
+if not isinstance(other, Scalar):
+other = scalar(other, type=self.type)
+return self.equals(other)
+except (TypeError, ValueError, ArrowInvalid):
+return NotImplemented
+
+def __hash__(self):
+cdef CScalarHash hasher
+return hasher(self.wrapped)
+
+def as_py(self):
+raise NotImplementedError()
 
 
-_NULL = NA = NullType()
+_NULL = NA = None
 
 
-cdef class ArrayValue(Scalar):
+cdef class NullScalar(Scalar):
 """
-The base class for non-null array elements.
+Concrete class for null scalars.
 """
 
-def __init__(self):
-raise TypeError("Do not call {}'s constructor directly, use array "
-"subscription instead."
-.format(self.__class__.__name__))
+def __cinit__(self):
+global NA
+if NA is not None:
+raise Exception('Cannot create multiple NAType instances')
+self.init(shared_ptr[CScalar](new CNullScalar()))
 
-cdef void init(self, DataType type, const shared_ptr[CArray]& sp_array,
-   int64_t index):
-self.type = type
-self.index = index
-self._set_array(sp_array)
+def __init__(self):
+pass
 
-cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
-self.sp_array = sp_array
+def __eq__(self, other):
+return NA
 
-def __repr__(self):
-if hasattr(self, 'as_py'):
-return repr(self.as_py())
-else:
-return super(Scalar, self).__repr__()
+def __hash__(self):
+cdef CScalarHash hasher
+return hasher(self.wrapped)
 
-def __str__(self):
-if hasattr(self, 'as_py'):
-return str(self.as_py())
-else:
-return super(Scalar, self).__str__()
+def as_py(self):
+"""
+Return this value as a Python None.
+"""
+return None
 
-def __eq__(self, other):
-if hasattr(self, 'as_py'):
-if isinstance(other, ArrayValue):
-other = other.as_py()
-return self.as_py() == other
-else:
-raise NotImplementedError(
-"Cannot compare Arrow values that don't support as_py()")
 
-def __hash__(self):
-return hash(self.as_py())
+_NULL = NA = NullScalar()
 
 
-cdef class BooleanValue(ArrayValue):
+cdef class BooleanScalar(Scalar):
 """
-Concrete class for boolean array elements.
+Concrete class for boolean scalars.
 """
 
 def as_py(self):
 """
 Return this value as a Python bool.
 """
-cdef CBooleanArray* ap =  self.sp_array.get()
-return ap.Value(self.index)
+cdef CBooleanScalar* sp =  self.wrapped.get()
+return sp.value if sp.is_valid else None
 
 
-cdef class Int8Value(ArrayValue):
+cdef class UInt8Scalar(Scalar):
 """
-   

[GitHub] [arrow] pitrou commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-06-30 Thread GitBox


pitrou commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r447732301



##
File path: python/pyarrow/scalar.pxi
##
@@ -16,1198 +16,745 @@
 # under the License.
 
 
-_NULL = NA = None
+import collections
 
 
 cdef class Scalar:
 """
-The base class for all array elements.
+The base class for scalars.
 """
 
+def __init__(self):
+raise TypeError("Do not call {}'s constructor directly, use "
+"pa.scalar() instead.".format(self.__class__.__name__))
 
-cdef class NullType(Scalar):
-"""
-Singleton for null array elements.
-"""
-# TODO rename this NullValue?
+cdef void init(self, const shared_ptr[CScalar]& wrapped):
+self.wrapped = wrapped
 
-def __cinit__(self):
-global NA
-if NA is not None:
-raise Exception('Cannot create multiple NAType instances')
+@staticmethod
+cdef wrap(const shared_ptr[CScalar]& wrapped):
+cdef:
+Scalar self
+Type type_id = wrapped.get().type.get().id()
+
+if type_id == _Type_NA:
+return _NULL
+
+typ = _scalar_classes[type_id]
+self = typ.__new__(typ)
+self.init(wrapped)
+
+return self
+
+cdef inline shared_ptr[CScalar] unwrap(self) nogil:
+return self.wrapped
+
+@property
+def type(self):
+return pyarrow_wrap_data_type(self.wrapped.get().type)
 
-self.type = null()
+@property
+def is_valid(self):
+return self.wrapped.get().is_valid
 
 def __repr__(self):
-return 'NULL'
+return ''.format(
+self.__class__.__name__, self.as_py()
+)
 
-def as_py(self):
-"""
-Return None
-"""
-return None
+def __str__(self):
+return str(self.as_py())
+
+def equals(self, Scalar other):
+return self.wrapped.get().Equals(other.unwrap().get()[0])
 
 def __eq__(self, other):
-return NA
+try:
+if not isinstance(other, Scalar):
+other = scalar(other, type=self.type)
+return self.equals(other)
+except (TypeError, ValueError, ArrowInvalid):
+return NotImplemented
+
+def __hash__(self):
+cdef CScalarHash hasher
+return hasher(self.wrapped)
+
+def as_py(self):
+raise NotImplementedError()
 
 
-_NULL = NA = NullType()
+_NULL = NA = None
 
 
-cdef class ArrayValue(Scalar):
+cdef class NullScalar(Scalar):
 """
-The base class for non-null array elements.
+Concrete class for null scalars.
 """
 
-def __init__(self):
-raise TypeError("Do not call {}'s constructor directly, use array "
-"subscription instead."
-.format(self.__class__.__name__))
+def __cinit__(self):
+global NA
+if NA is not None:
+raise Exception('Cannot create multiple NAType instances')
+self.init(shared_ptr[CScalar](new CNullScalar()))
 
-cdef void init(self, DataType type, const shared_ptr[CArray]& sp_array,
-   int64_t index):
-self.type = type
-self.index = index
-self._set_array(sp_array)
+def __init__(self):
+pass
 
-cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
-self.sp_array = sp_array
+def __eq__(self, other):
+return NA

Review comment:
   Well, it should probably return `False` for non-nulls?





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org