[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-07-06 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r450383737



##
File path: cpp/src/arrow/scalar_test.cc
##
@@ -451,6 +565,127 @@ TEST(TestStructScalar, FieldAccess) {
 
   ASSERT_RAISES(Invalid, abc.field(5).status());
   ASSERT_RAISES(Invalid, abc.field("c").status());
+
+  ASSERT_OK_AND_ASSIGN(auto d, abc.field("d"));
+  ASSERT_TRUE(d->Equals(MakeNullScalar(int64(;
+}
+
+TEST(TestDictionaryScalar, Basics) {
+  auto ty = dictionary(int8(), utf8());
+  auto dict = ArrayFromJSON(utf8(), "[\"alpha\", \"beta\", \"gamma\"]");
+
+  DictionaryScalar::ValueType alpha;
+  ASSERT_OK_AND_ASSIGN(alpha.index, MakeScalar(int8(), 0));
+  alpha.dictionary = dict;
+
+  DictionaryScalar::ValueType gamma;
+  ASSERT_OK_AND_ASSIGN(gamma.index, MakeScalar(int8(), 2));
+  gamma.dictionary = dict;
+
+  auto scalar_null = MakeNullScalar(ty);
+  auto scalar_alpha = DictionaryScalar(alpha, ty);
+  auto scalar_gamma = DictionaryScalar(gamma, ty);
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_null,
+  checked_cast(*scalar_null).GetEncodedValue());
+  ASSERT_TRUE(encoded_null->Equals(MakeNullScalar(utf8(;
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_alpha,
+  checked_cast(scalar_alpha).GetEncodedValue());
+  ASSERT_TRUE(encoded_alpha->Equals(MakeScalar("alpha")));
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_gamma,
+  checked_cast(scalar_gamma).GetEncodedValue());
+  ASSERT_TRUE(encoded_gamma->Equals(MakeScalar("gamma")));
+
+  // test Array.GetScalar
+  DictionaryArray arr(ty, ArrayFromJSON(int8(), "[2, 0, 1, null]"), dict);
+  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
+  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
+  ASSERT_OK_AND_ASSIGN(auto last, arr.GetScalar(3));
+
+  ASSERT_TRUE(first->Equals(scalar_gamma));
+  ASSERT_TRUE(second->Equals(scalar_alpha));
+  ASSERT_TRUE(last->Equals(scalar_null));
+}
+
+TEST(TestSparseUnionScalar, Basics) {
+  auto ty = sparse_union({field("string", utf8()), field("number", uint64())});
+
+  auto alpha = MakeScalar("alpha");
+  auto beta = MakeScalar("beta");
+  ASSERT_OK_AND_ASSIGN(auto two, MakeScalar(uint64(), 2));
+
+  auto scalar_alpha = SparseUnionScalar(alpha, ty);
+  auto scalar_beta = SparseUnionScalar(beta, ty);
+  auto scalar_two = SparseUnionScalar(two, ty);
+
+  // test Array.GetScalar
+  std::vector> children{
+  ArrayFromJSON(utf8(), "[\"alpha\", \"\", \"beta\", null, \"gamma\"]"),
+  ArrayFromJSON(uint64(), "[1, 2, 11, 22, null]")};
+
+  auto type_ids = ArrayFromJSON(int8(), "[0, 1, 0, 0, 1]");
+  SparseUnionArray arr(ty, 5, children, type_ids->data()->buffers[1]);
+  ASSERT_OK(arr.ValidateFull());
+
+  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
+  ASSERT_TRUE(first->Equals(scalar_alpha));
+
+  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
+  ASSERT_TRUE(second->Equals(scalar_two));
+
+  ASSERT_OK_AND_ASSIGN(auto third, arr.GetScalar(2));
+  ASSERT_TRUE(third->Equals(scalar_beta));
+
+  ASSERT_OK_AND_ASSIGN(auto fourth, arr.GetScalar(3));
+  ASSERT_TRUE(fourth->Equals(MakeNullScalar(ty)));

Review comment:
   We can ensure to have an explicit type to the value scalar from 
`array.GetScalar()` but not when a null union scalar is directly constructed, 
so `MakeNullScalar(union_type)` won't know which type to choose for the 
underlying value scalar.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-07-06 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r450344074



##
File path: cpp/src/arrow/scalar_test.cc
##
@@ -451,6 +565,127 @@ TEST(TestStructScalar, FieldAccess) {
 
   ASSERT_RAISES(Invalid, abc.field(5).status());
   ASSERT_RAISES(Invalid, abc.field("c").status());
+
+  ASSERT_OK_AND_ASSIGN(auto d, abc.field("d"));
+  ASSERT_TRUE(d->Equals(MakeNullScalar(int64(;
+}
+
+TEST(TestDictionaryScalar, Basics) {
+  auto ty = dictionary(int8(), utf8());
+  auto dict = ArrayFromJSON(utf8(), "[\"alpha\", \"beta\", \"gamma\"]");
+
+  DictionaryScalar::ValueType alpha;
+  ASSERT_OK_AND_ASSIGN(alpha.index, MakeScalar(int8(), 0));
+  alpha.dictionary = dict;
+
+  DictionaryScalar::ValueType gamma;
+  ASSERT_OK_AND_ASSIGN(gamma.index, MakeScalar(int8(), 2));
+  gamma.dictionary = dict;
+
+  auto scalar_null = MakeNullScalar(ty);
+  auto scalar_alpha = DictionaryScalar(alpha, ty);
+  auto scalar_gamma = DictionaryScalar(gamma, ty);
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_null,
+  checked_cast(*scalar_null).GetEncodedValue());
+  ASSERT_TRUE(encoded_null->Equals(MakeNullScalar(utf8(;
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_alpha,
+  checked_cast(scalar_alpha).GetEncodedValue());
+  ASSERT_TRUE(encoded_alpha->Equals(MakeScalar("alpha")));
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_gamma,
+  checked_cast(scalar_gamma).GetEncodedValue());
+  ASSERT_TRUE(encoded_gamma->Equals(MakeScalar("gamma")));
+
+  // test Array.GetScalar
+  DictionaryArray arr(ty, ArrayFromJSON(int8(), "[2, 0, 1, null]"), dict);
+  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
+  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
+  ASSERT_OK_AND_ASSIGN(auto last, arr.GetScalar(3));
+
+  ASSERT_TRUE(first->Equals(scalar_gamma));
+  ASSERT_TRUE(second->Equals(scalar_alpha));
+  ASSERT_TRUE(last->Equals(scalar_null));
+}
+
+TEST(TestSparseUnionScalar, Basics) {
+  auto ty = sparse_union({field("string", utf8()), field("number", uint64())});
+
+  auto alpha = MakeScalar("alpha");
+  auto beta = MakeScalar("beta");
+  ASSERT_OK_AND_ASSIGN(auto two, MakeScalar(uint64(), 2));
+
+  auto scalar_alpha = SparseUnionScalar(alpha, ty);
+  auto scalar_beta = SparseUnionScalar(beta, ty);
+  auto scalar_two = SparseUnionScalar(two, ty);
+
+  // test Array.GetScalar
+  std::vector> children{
+  ArrayFromJSON(utf8(), "[\"alpha\", \"\", \"beta\", null, \"gamma\"]"),
+  ArrayFromJSON(uint64(), "[1, 2, 11, 22, null]")};
+
+  auto type_ids = ArrayFromJSON(int8(), "[0, 1, 0, 0, 1]");
+  SparseUnionArray arr(ty, 5, children, type_ids->data()->buffers[1]);
+  ASSERT_OK(arr.ValidateFull());
+
+  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
+  ASSERT_TRUE(first->Equals(scalar_alpha));
+
+  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
+  ASSERT_TRUE(second->Equals(scalar_two));
+
+  ASSERT_OK_AND_ASSIGN(auto third, arr.GetScalar(2));
+  ASSERT_TRUE(third->Equals(scalar_beta));
+
+  ASSERT_OK_AND_ASSIGN(auto fourth, arr.GetScalar(3));
+  ASSERT_TRUE(fourth->Equals(MakeNullScalar(ty)));

Review comment:
   So for UnionScalar we have two `is_valid` flags, une for the union 
scalar and one for the underlying value scalar. Currently `MakeNullScalar()` 
sets the validity flag for the union scalar and leaves the value scalar 
uninitialized. 
   
   What do you think, shall we initialize the value scalar as well?





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-07-06 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r450332885



##
File path: cpp/src/arrow/scalar_test.cc
##
@@ -451,6 +565,127 @@ TEST(TestStructScalar, FieldAccess) {
 
   ASSERT_RAISES(Invalid, abc.field(5).status());
   ASSERT_RAISES(Invalid, abc.field("c").status());
+
+  ASSERT_OK_AND_ASSIGN(auto d, abc.field("d"));
+  ASSERT_TRUE(d->Equals(MakeNullScalar(int64(;
+}
+
+TEST(TestDictionaryScalar, Basics) {
+  auto ty = dictionary(int8(), utf8());
+  auto dict = ArrayFromJSON(utf8(), "[\"alpha\", \"beta\", \"gamma\"]");
+
+  DictionaryScalar::ValueType alpha;
+  ASSERT_OK_AND_ASSIGN(alpha.index, MakeScalar(int8(), 0));
+  alpha.dictionary = dict;
+
+  DictionaryScalar::ValueType gamma;
+  ASSERT_OK_AND_ASSIGN(gamma.index, MakeScalar(int8(), 2));
+  gamma.dictionary = dict;
+
+  auto scalar_null = MakeNullScalar(ty);
+  auto scalar_alpha = DictionaryScalar(alpha, ty);
+  auto scalar_gamma = DictionaryScalar(gamma, ty);
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_null,
+  checked_cast(*scalar_null).GetEncodedValue());
+  ASSERT_TRUE(encoded_null->Equals(MakeNullScalar(utf8(;
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_alpha,
+  checked_cast(scalar_alpha).GetEncodedValue());
+  ASSERT_TRUE(encoded_alpha->Equals(MakeScalar("alpha")));
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_gamma,
+  checked_cast(scalar_gamma).GetEncodedValue());
+  ASSERT_TRUE(encoded_gamma->Equals(MakeScalar("gamma")));
+
+  // test Array.GetScalar
+  DictionaryArray arr(ty, ArrayFromJSON(int8(), "[2, 0, 1, null]"), dict);
+  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
+  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
+  ASSERT_OK_AND_ASSIGN(auto last, arr.GetScalar(3));
+
+  ASSERT_TRUE(first->Equals(scalar_gamma));
+  ASSERT_TRUE(second->Equals(scalar_alpha));
+  ASSERT_TRUE(last->Equals(scalar_null));
+}
+
+TEST(TestSparseUnionScalar, Basics) {
+  auto ty = sparse_union({field("string", utf8()), field("number", uint64())});
+
+  auto alpha = MakeScalar("alpha");
+  auto beta = MakeScalar("beta");
+  ASSERT_OK_AND_ASSIGN(auto two, MakeScalar(uint64(), 2));
+
+  auto scalar_alpha = SparseUnionScalar(alpha, ty);
+  auto scalar_beta = SparseUnionScalar(beta, ty);
+  auto scalar_two = SparseUnionScalar(two, ty);
+
+  // test Array.GetScalar
+  std::vector> children{
+  ArrayFromJSON(utf8(), "[\"alpha\", \"\", \"beta\", null, \"gamma\"]"),
+  ArrayFromJSON(uint64(), "[1, 2, 11, 22, null]")};
+
+  auto type_ids = ArrayFromJSON(int8(), "[0, 1, 0, 0, 1]");
+  SparseUnionArray arr(ty, 5, children, type_ids->data()->buffers[1]);
+  ASSERT_OK(arr.ValidateFull());
+
+  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
+  ASSERT_TRUE(first->Equals(scalar_alpha));
+
+  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
+  ASSERT_TRUE(second->Equals(scalar_two));
+
+  ASSERT_OK_AND_ASSIGN(auto third, arr.GetScalar(2));
+  ASSERT_TRUE(third->Equals(scalar_beta));
+
+  ASSERT_OK_AND_ASSIGN(auto fourth, arr.GetScalar(3));
+  ASSERT_TRUE(fourth->Equals(MakeNullScalar(ty)));

Review comment:
   Adding.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-07-06 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r450327095



##
File path: cpp/src/arrow/scalar_test.cc
##
@@ -451,6 +565,127 @@ TEST(TestStructScalar, FieldAccess) {
 
   ASSERT_RAISES(Invalid, abc.field(5).status());
   ASSERT_RAISES(Invalid, abc.field("c").status());
+
+  ASSERT_OK_AND_ASSIGN(auto d, abc.field("d"));
+  ASSERT_TRUE(d->Equals(MakeNullScalar(int64(;
+}
+
+TEST(TestDictionaryScalar, Basics) {
+  auto ty = dictionary(int8(), utf8());
+  auto dict = ArrayFromJSON(utf8(), "[\"alpha\", \"beta\", \"gamma\"]");
+
+  DictionaryScalar::ValueType alpha;
+  ASSERT_OK_AND_ASSIGN(alpha.index, MakeScalar(int8(), 0));
+  alpha.dictionary = dict;
+
+  DictionaryScalar::ValueType gamma;
+  ASSERT_OK_AND_ASSIGN(gamma.index, MakeScalar(int8(), 2));
+  gamma.dictionary = dict;
+
+  auto scalar_null = MakeNullScalar(ty);
+  auto scalar_alpha = DictionaryScalar(alpha, ty);
+  auto scalar_gamma = DictionaryScalar(gamma, ty);
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_null,
+  checked_cast(*scalar_null).GetEncodedValue());
+  ASSERT_TRUE(encoded_null->Equals(MakeNullScalar(utf8(;
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_alpha,
+  checked_cast(scalar_alpha).GetEncodedValue());
+  ASSERT_TRUE(encoded_alpha->Equals(MakeScalar("alpha")));
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_gamma,
+  checked_cast(scalar_gamma).GetEncodedValue());
+  ASSERT_TRUE(encoded_gamma->Equals(MakeScalar("gamma")));
+
+  // test Array.GetScalar
+  DictionaryArray arr(ty, ArrayFromJSON(int8(), "[2, 0, 1, null]"), dict);
+  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
+  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
+  ASSERT_OK_AND_ASSIGN(auto last, arr.GetScalar(3));
+
+  ASSERT_TRUE(first->Equals(scalar_gamma));
+  ASSERT_TRUE(second->Equals(scalar_alpha));
+  ASSERT_TRUE(last->Equals(scalar_null));
+}
+
+TEST(TestSparseUnionScalar, Basics) {
+  auto ty = sparse_union({field("string", utf8()), field("number", uint64())});
+
+  auto alpha = MakeScalar("alpha");
+  auto beta = MakeScalar("beta");
+  ASSERT_OK_AND_ASSIGN(auto two, MakeScalar(uint64(), 2));
+
+  auto scalar_alpha = SparseUnionScalar(alpha, ty);
+  auto scalar_beta = SparseUnionScalar(beta, ty);
+  auto scalar_two = SparseUnionScalar(two, ty);
+
+  // test Array.GetScalar
+  std::vector> children{
+  ArrayFromJSON(utf8(), "[\"alpha\", \"\", \"beta\", null, \"gamma\"]"),
+  ArrayFromJSON(uint64(), "[1, 2, 11, 22, null]")};
+
+  auto type_ids = ArrayFromJSON(int8(), "[0, 1, 0, 0, 1]");
+  SparseUnionArray arr(ty, 5, children, type_ids->data()->buffers[1]);
+  ASSERT_OK(arr.ValidateFull());
+
+  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
+  ASSERT_TRUE(first->Equals(scalar_alpha));
+
+  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
+  ASSERT_TRUE(second->Equals(scalar_two));
+
+  ASSERT_OK_AND_ASSIGN(auto third, arr.GetScalar(2));
+  ASSERT_TRUE(third->Equals(scalar_beta));
+
+  ASSERT_OK_AND_ASSIGN(auto fourth, arr.GetScalar(3));
+  ASSERT_TRUE(fourth->Equals(MakeNullScalar(ty)));

Review comment:
   We have the value type in `UnionScalar.value.type`.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-07-06 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r450318972



##
File path: cpp/src/arrow/scalar.cc
##
@@ -185,6 +192,35 @@ 
DictionaryScalar::DictionaryScalar(std::shared_ptr type)
 0)
 .ValueOrDie()} {}
 
+Result> DictionaryScalar::GetEncodedValue() const {
+  const auto& dict_type = checked_cast(*type);
+
+  if (!is_valid) {
+return MakeNullScalar(dict_type.value_type());
+  }
+
+  int64_t index_value = 0;
+  switch (dict_type.index_type()->id()) {
+case Type::INT8:
+  index_value = checked_cast(*value.index).value;
+  break;
+case Type::INT16:
+  index_value = checked_cast(*value.index).value;
+  break;
+case Type::INT32:
+  index_value = checked_cast(*value.index).value;
+  break;
+case Type::INT64:
+  index_value = checked_cast(*value.index).value;
+  break;
+default:
+  return Status::Invalid("Not implemented dictionary index type");

Review comment:
   Updating.

##
File path: cpp/src/arrow/scalar_test.cc
##
@@ -80,6 +84,15 @@ TYPED_TEST(TestNumericScalar, Basics) {
 
   auto dyn_null_value = MakeNullScalar(expected_type);
   ASSERT_EQ(*null_value, *dyn_null_value);
+
+  // test Array.GetScalar
+  auto arr = ArrayFromJSON(expected_type, "[null, 1, 2]");
+  ASSERT_OK_AND_ASSIGN(auto null, arr->GetScalar(0));
+  ASSERT_OK_AND_ASSIGN(auto one, arr->GetScalar(1));
+  ASSERT_OK_AND_ASSIGN(auto two, arr->GetScalar(2));
+  ASSERT_TRUE(null->Equals(*null_value));
+  ASSERT_TRUE(one->Equals(ScalarType(1)));
+  ASSERT_TRUE(two->Equals(ScalarType(2)));

Review comment:
   Not yet, adding check for it.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-07-06 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r450317608



##
File path: cpp/src/arrow/scalar_test.cc
##
@@ -451,6 +565,127 @@ TEST(TestStructScalar, FieldAccess) {
 
   ASSERT_RAISES(Invalid, abc.field(5).status());
   ASSERT_RAISES(Invalid, abc.field("c").status());
+
+  ASSERT_OK_AND_ASSIGN(auto d, abc.field("d"));
+  ASSERT_TRUE(d->Equals(MakeNullScalar(int64(;
+}
+
+TEST(TestDictionaryScalar, Basics) {
+  auto ty = dictionary(int8(), utf8());
+  auto dict = ArrayFromJSON(utf8(), "[\"alpha\", \"beta\", \"gamma\"]");
+
+  DictionaryScalar::ValueType alpha;
+  ASSERT_OK_AND_ASSIGN(alpha.index, MakeScalar(int8(), 0));
+  alpha.dictionary = dict;
+
+  DictionaryScalar::ValueType gamma;
+  ASSERT_OK_AND_ASSIGN(gamma.index, MakeScalar(int8(), 2));
+  gamma.dictionary = dict;
+
+  auto scalar_null = MakeNullScalar(ty);
+  auto scalar_alpha = DictionaryScalar(alpha, ty);
+  auto scalar_gamma = DictionaryScalar(gamma, ty);
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_null,
+  checked_cast(*scalar_null).GetEncodedValue());
+  ASSERT_TRUE(encoded_null->Equals(MakeNullScalar(utf8(;
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_alpha,
+  checked_cast(scalar_alpha).GetEncodedValue());
+  ASSERT_TRUE(encoded_alpha->Equals(MakeScalar("alpha")));
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_gamma,
+  checked_cast(scalar_gamma).GetEncodedValue());
+  ASSERT_TRUE(encoded_gamma->Equals(MakeScalar("gamma")));
+
+  // test Array.GetScalar
+  DictionaryArray arr(ty, ArrayFromJSON(int8(), "[2, 0, 1, null]"), dict);
+  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
+  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
+  ASSERT_OK_AND_ASSIGN(auto last, arr.GetScalar(3));
+
+  ASSERT_TRUE(first->Equals(scalar_gamma));
+  ASSERT_TRUE(second->Equals(scalar_alpha));
+  ASSERT_TRUE(last->Equals(scalar_null));
+}
+
+TEST(TestSparseUnionScalar, Basics) {
+  auto ty = sparse_union({field("string", utf8()), field("number", uint64())});
+
+  auto alpha = MakeScalar("alpha");
+  auto beta = MakeScalar("beta");
+  ASSERT_OK_AND_ASSIGN(auto two, MakeScalar(uint64(), 2));
+
+  auto scalar_alpha = SparseUnionScalar(alpha, ty);
+  auto scalar_beta = SparseUnionScalar(beta, ty);
+  auto scalar_two = SparseUnionScalar(two, ty);
+
+  // test Array.GetScalar
+  std::vector> children{
+  ArrayFromJSON(utf8(), "[\"alpha\", \"\", \"beta\", null, \"gamma\"]"),
+  ArrayFromJSON(uint64(), "[1, 2, 11, 22, null]")};
+
+  auto type_ids = ArrayFromJSON(int8(), "[0, 1, 0, 0, 1]");
+  SparseUnionArray arr(ty, 5, children, type_ids->data()->buffers[1]);
+  ASSERT_OK(arr.ValidateFull());
+
+  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
+  ASSERT_TRUE(first->Equals(scalar_alpha));
+
+  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
+  ASSERT_TRUE(second->Equals(scalar_two));
+
+  ASSERT_OK_AND_ASSIGN(auto third, arr.GetScalar(2));
+  ASSERT_TRUE(third->Equals(scalar_beta));
+
+  ASSERT_OK_AND_ASSIGN(auto fourth, arr.GetScalar(3));
+  ASSERT_TRUE(fourth->Equals(MakeNullScalar(ty)));

Review comment:
   I think `GetScalar()` should return a Scalar with the same type as the 
Array, so in case of unions with the union type rather than the type selected 
by the union type id (the union scalar's value is another scalar with the type 
id).
   
   I don't have a strong opinion on this though.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-07-06 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r450317608



##
File path: cpp/src/arrow/scalar_test.cc
##
@@ -451,6 +565,127 @@ TEST(TestStructScalar, FieldAccess) {
 
   ASSERT_RAISES(Invalid, abc.field(5).status());
   ASSERT_RAISES(Invalid, abc.field("c").status());
+
+  ASSERT_OK_AND_ASSIGN(auto d, abc.field("d"));
+  ASSERT_TRUE(d->Equals(MakeNullScalar(int64(;
+}
+
+TEST(TestDictionaryScalar, Basics) {
+  auto ty = dictionary(int8(), utf8());
+  auto dict = ArrayFromJSON(utf8(), "[\"alpha\", \"beta\", \"gamma\"]");
+
+  DictionaryScalar::ValueType alpha;
+  ASSERT_OK_AND_ASSIGN(alpha.index, MakeScalar(int8(), 0));
+  alpha.dictionary = dict;
+
+  DictionaryScalar::ValueType gamma;
+  ASSERT_OK_AND_ASSIGN(gamma.index, MakeScalar(int8(), 2));
+  gamma.dictionary = dict;
+
+  auto scalar_null = MakeNullScalar(ty);
+  auto scalar_alpha = DictionaryScalar(alpha, ty);
+  auto scalar_gamma = DictionaryScalar(gamma, ty);
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_null,
+  checked_cast(*scalar_null).GetEncodedValue());
+  ASSERT_TRUE(encoded_null->Equals(MakeNullScalar(utf8(;
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_alpha,
+  checked_cast(scalar_alpha).GetEncodedValue());
+  ASSERT_TRUE(encoded_alpha->Equals(MakeScalar("alpha")));
+
+  ASSERT_OK_AND_ASSIGN(
+  auto encoded_gamma,
+  checked_cast(scalar_gamma).GetEncodedValue());
+  ASSERT_TRUE(encoded_gamma->Equals(MakeScalar("gamma")));
+
+  // test Array.GetScalar
+  DictionaryArray arr(ty, ArrayFromJSON(int8(), "[2, 0, 1, null]"), dict);
+  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
+  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
+  ASSERT_OK_AND_ASSIGN(auto last, arr.GetScalar(3));
+
+  ASSERT_TRUE(first->Equals(scalar_gamma));
+  ASSERT_TRUE(second->Equals(scalar_alpha));
+  ASSERT_TRUE(last->Equals(scalar_null));
+}
+
+TEST(TestSparseUnionScalar, Basics) {
+  auto ty = sparse_union({field("string", utf8()), field("number", uint64())});
+
+  auto alpha = MakeScalar("alpha");
+  auto beta = MakeScalar("beta");
+  ASSERT_OK_AND_ASSIGN(auto two, MakeScalar(uint64(), 2));
+
+  auto scalar_alpha = SparseUnionScalar(alpha, ty);
+  auto scalar_beta = SparseUnionScalar(beta, ty);
+  auto scalar_two = SparseUnionScalar(two, ty);
+
+  // test Array.GetScalar
+  std::vector> children{
+  ArrayFromJSON(utf8(), "[\"alpha\", \"\", \"beta\", null, \"gamma\"]"),
+  ArrayFromJSON(uint64(), "[1, 2, 11, 22, null]")};
+
+  auto type_ids = ArrayFromJSON(int8(), "[0, 1, 0, 0, 1]");
+  SparseUnionArray arr(ty, 5, children, type_ids->data()->buffers[1]);
+  ASSERT_OK(arr.ValidateFull());
+
+  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
+  ASSERT_TRUE(first->Equals(scalar_alpha));
+
+  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
+  ASSERT_TRUE(second->Equals(scalar_two));
+
+  ASSERT_OK_AND_ASSIGN(auto third, arr.GetScalar(2));
+  ASSERT_TRUE(third->Equals(scalar_beta));
+
+  ASSERT_OK_AND_ASSIGN(auto fourth, arr.GetScalar(3));
+  ASSERT_TRUE(fourth->Equals(MakeNullScalar(ty)));

Review comment:
   I think `GetScalar()` should return a Scalar with the same type as the 
Array. 





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-07-03 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r449512190



##
File path: python/pyarrow/tests/test_array.py
##
@@ -583,12 +584,14 @@ def test_dictionary_from_numpy():
 assert d2.dictionary.to_pylist() == dictionary.tolist()
 
 for i in range(len(indices)):
+assert d1[i].dictionary == dictionary
 assert d1[i].as_py() == dictionary[indices[i]]
 
-if mask[i]:
-assert d2[i] is pa.NULL
-else:
-assert d2[i].as_py() == dictionary[indices[i]]
+print(i, d2[i])
+# if mask[i]:
+# assert d2[i].as_py() is None
+# else:
+# assert d2[i].as_py() == dictionary[indices[i]]

Review comment:
   Fixed.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-07-01 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r448620980



##
File path: python/pyarrow/scalar.pxi
##
@@ -16,1198 +16,745 @@
 # under the License.
 
 
-_NULL = NA = None
+import collections
 
 
 cdef class Scalar:
 """
-The base class for all array elements.
+The base class for scalars.
 """
 
+def __init__(self):
+raise TypeError("Do not call {}'s constructor directly, use "
+"pa.scalar() instead.".format(self.__class__.__name__))
 
-cdef class NullType(Scalar):
-"""
-Singleton for null array elements.
-"""
-# TODO rename this NullValue?
+cdef void init(self, const shared_ptr[CScalar]& wrapped):
+self.wrapped = wrapped
 
-def __cinit__(self):
-global NA
-if NA is not None:
-raise Exception('Cannot create multiple NAType instances')
+@staticmethod
+cdef wrap(const shared_ptr[CScalar]& wrapped):
+cdef:
+Scalar self
+Type type_id = wrapped.get().type.get().id()
+
+if type_id == _Type_NA:
+return _NULL
+
+typ = _scalar_classes[type_id]
+self = typ.__new__(typ)
+self.init(wrapped)
+
+return self
+
+cdef inline shared_ptr[CScalar] unwrap(self) nogil:
+return self.wrapped
+
+@property
+def type(self):
+return pyarrow_wrap_data_type(self.wrapped.get().type)
 
-self.type = null()
+@property
+def is_valid(self):
+return self.wrapped.get().is_valid
 
 def __repr__(self):
-return 'NULL'
+return ''.format(
+self.__class__.__name__, self.as_py()
+)
 
-def as_py(self):
-"""
-Return None
-"""
-return None
+def __str__(self):
+return str(self.as_py())
+
+def equals(self, Scalar other):
+return self.wrapped.get().Equals(other.unwrap().get()[0])
 
 def __eq__(self, other):
-return NA
+try:
+if not isinstance(other, Scalar):
+other = scalar(other, type=self.type)
+return self.equals(other)
+except (TypeError, ValueError, ArrowInvalid):
+return NotImplemented
+
+def __hash__(self):
+cdef CScalarHash hasher
+return hasher(self.wrapped)
+
+def as_py(self):
+raise NotImplementedError()
 
 
-_NULL = NA = NullType()
+_NULL = NA = None
 
 
-cdef class ArrayValue(Scalar):
+cdef class NullScalar(Scalar):
 """
-The base class for non-null array elements.
+Concrete class for null scalars.
 """
 
-def __init__(self):
-raise TypeError("Do not call {}'s constructor directly, use array "
-"subscription instead."
-.format(self.__class__.__name__))
+def __cinit__(self):
+global NA
+if NA is not None:
+raise Exception('Cannot create multiple NAType instances')
+self.init(shared_ptr[CScalar](new CNullScalar()))
 
-cdef void init(self, DataType type, const shared_ptr[CArray]& sp_array,
-   int64_t index):
-self.type = type
-self.index = index
-self._set_array(sp_array)
+def __init__(self):
+pass
 
-cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
-self.sp_array = sp_array
+def __eq__(self, other):
+return NA

Review comment:
   Indeed, the c++ impl should return the same.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-07-01 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r448589973



##
File path: python/pyarrow/scalar.pxi
##
@@ -1217,21 +764,50 @@ cdef dict _scalar_classes = {
 _Type_INT16: Int16Scalar,
 _Type_INT32: Int32Scalar,
 _Type_INT64: Int64Scalar,
+_Type_HALF_FLOAT: HalfFloatScalar,
 _Type_FLOAT: FloatScalar,
 _Type_DOUBLE: DoubleScalar,
+_Type_DECIMAL: Decimal128Scalar,
+_Type_DATE32: Date32Scalar,
+_Type_DATE64: Date64Scalar,
+_Type_TIME32: Time32Scalar,
+_Type_TIME64: Time64Scalar,
+_Type_TIMESTAMP: TimestampScalar,
+_Type_DURATION: DurationScalar,
+_Type_BINARY: BinaryScalar,
+_Type_LARGE_BINARY: LargeBinaryScalar,
+_Type_FIXED_SIZE_BINARY: FixedSizeBinaryScalar,
 _Type_STRING: StringScalar,
+_Type_LARGE_STRING: LargeStringScalar,
+_Type_LIST: ListScalar,
+_Type_LARGE_LIST: LargeListScalar,
+_Type_FIXED_SIZE_LIST: FixedSizeListScalar,
+_Type_STRUCT: StructScalar,
+_Type_MAP: MapScalar,
+_Type_DICTIONARY: DictionaryScalar,
+_Type_SPARSE_UNION: UnionScalar,
+_Type_DENSE_UNION: UnionScalar,
 }
 
-cdef object box_scalar(DataType type, const shared_ptr[CArray]& sp_array,
-   int64_t index):
-cdef ArrayValue value
 
-if type.type.id() == _Type_NA:
-return _NULL
-elif sp_array.get().IsNull(index):
-return _NULL
-else:
-klass = _array_value_classes[type.type.id()]
-value = klass.__new__(klass)
-value.init(type, sp_array, index)
-return value
+def scalar(value, DataType type=None, bint safe=True,

Review comment:
   Removed.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-07-01 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r448589857



##
File path: python/pyarrow/scalar.pxi
##
@@ -1217,21 +764,50 @@ cdef dict _scalar_classes = {
 _Type_INT16: Int16Scalar,
 _Type_INT32: Int32Scalar,
 _Type_INT64: Int64Scalar,
+_Type_HALF_FLOAT: HalfFloatScalar,
 _Type_FLOAT: FloatScalar,
 _Type_DOUBLE: DoubleScalar,
+_Type_DECIMAL: Decimal128Scalar,
+_Type_DATE32: Date32Scalar,
+_Type_DATE64: Date64Scalar,
+_Type_TIME32: Time32Scalar,
+_Type_TIME64: Time64Scalar,
+_Type_TIMESTAMP: TimestampScalar,
+_Type_DURATION: DurationScalar,
+_Type_BINARY: BinaryScalar,
+_Type_LARGE_BINARY: LargeBinaryScalar,
+_Type_FIXED_SIZE_BINARY: FixedSizeBinaryScalar,
 _Type_STRING: StringScalar,
+_Type_LARGE_STRING: LargeStringScalar,
+_Type_LIST: ListScalar,
+_Type_LARGE_LIST: LargeListScalar,
+_Type_FIXED_SIZE_LIST: FixedSizeListScalar,
+_Type_STRUCT: StructScalar,
+_Type_MAP: MapScalar,
+_Type_DICTIONARY: DictionaryScalar,
+_Type_SPARSE_UNION: UnionScalar,
+_Type_DENSE_UNION: UnionScalar,
 }
 
-cdef object box_scalar(DataType type, const shared_ptr[CArray]& sp_array,
-   int64_t index):
-cdef ArrayValue value
 
-if type.type.id() == _Type_NA:
-return _NULL
-elif sp_array.get().IsNull(index):
-return _NULL
-else:
-klass = _array_value_classes[type.type.id()]
-value = klass.__new__(klass)
-value.init(type, sp_array, index)
-return value
+def scalar(value, DataType type=None, bint safe=True,
+   MemoryPool memory_pool=None):
+cdef:
+PyConversionOptions options
+shared_ptr[CScalar] scalar
+shared_ptr[CArray] array
+shared_ptr[CChunkedArray] chunked
+
+options.size = 1
+options.pool = maybe_unbox_memory_pool(memory_pool)
+# options.from_pandas = from_pandas

Review comment:
   Implemented it.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-07-01 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r448560952



##
File path: python/pyarrow/scalar.pxi
##
@@ -1217,21 +767,95 @@ cdef dict _scalar_classes = {
 _Type_INT16: Int16Scalar,
 _Type_INT32: Int32Scalar,
 _Type_INT64: Int64Scalar,
+_Type_HALF_FLOAT: HalfFloatScalar,
 _Type_FLOAT: FloatScalar,
 _Type_DOUBLE: DoubleScalar,
+_Type_DECIMAL: Decimal128Scalar,
+_Type_DATE32: Date32Scalar,
+_Type_DATE64: Date64Scalar,
+_Type_TIME32: Time32Scalar,
+_Type_TIME64: Time64Scalar,
+_Type_TIMESTAMP: TimestampScalar,
+_Type_DURATION: DurationScalar,
+_Type_BINARY: BinaryScalar,
+_Type_LARGE_BINARY: LargeBinaryScalar,
+_Type_FIXED_SIZE_BINARY: FixedSizeBinaryScalar,
 _Type_STRING: StringScalar,
+_Type_LARGE_STRING: LargeStringScalar,
+_Type_LIST: ListScalar,
+_Type_LARGE_LIST: LargeListScalar,
+_Type_FIXED_SIZE_LIST: FixedSizeListScalar,
+_Type_STRUCT: StructScalar,
+_Type_MAP: MapScalar,
+_Type_DICTIONARY: DictionaryScalar,
+_Type_SPARSE_UNION: UnionScalar,
+_Type_DENSE_UNION: UnionScalar,
 }
 
-cdef object box_scalar(DataType type, const shared_ptr[CArray]& sp_array,
-   int64_t index):
-cdef ArrayValue value
 
-if type.type.id() == _Type_NA:
-return _NULL
-elif sp_array.get().IsNull(index):
-return _NULL
-else:
-klass = _array_value_classes[type.type.id()]
-value = klass.__new__(klass)
-value.init(type, sp_array, index)
-return value
+def scalar(value, DataType type=None, *, from_pandas=None,
+   MemoryPool memory_pool=None):
+"""
+Create a pyarrow.Scalar instance from a Python object.
+
+Parameters
+--
+value : Any
+Python object coercible to arrow's type system.
+type : pyarrow.DataType
+Explicit type to attempt to coerce to, otherwise will be inferred from
+the value.
+from_pandas : bool, default None
+Use pandas's semantics for inferring nulls from values in
+ndarray-like data. Defaults to False if not passed explicitly by user,
+or True if a pandas object is passed in.
+memory_pool : pyarrow.MemoryPool, optional
+If not passed, will allocate memory from the currently-set default
+memory pool.
+
+Returns
+---
+scalar : pyarrow.Scalar
+
+Notes
+-
+Localized timestamps will currently be returned as UTC (pandas's native
+representation). Timezone-naive data will be implicitly interpreted as
+UTC.

Review comment:
   Removing. Copied from `pa.array()`'s docstring, so we mey need to update 
it there as well.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-07-01 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r448560164



##
File path: python/pyarrow/scalar.pxi
##
@@ -16,1198 +16,748 @@
 # under the License.
 
 
-_NULL = NA = None
+import collections
 
 
 cdef class Scalar:
 """
-The base class for all array elements.
-"""
-
-
-cdef class NullType(Scalar):
-"""
-Singleton for null array elements.
+The base class for scalars.
 """
-# TODO rename this NullValue?
-
-def __cinit__(self):
-global NA
-if NA is not None:
-raise Exception('Cannot create multiple NAType instances')
-
-self.type = null()
-
-def __repr__(self):
-return 'NULL'
 
-def as_py(self):
-"""
-Return None
-"""
-return None
+def __init__(self):
+raise TypeError("Do not call {}'s constructor directly, use "
+"pa.scalar() instead.".format(self.__class__.__name__))
 
-def __eq__(self, other):
-return NA
+cdef void init(self, const shared_ptr[CScalar]& wrapped):
+self.wrapped = wrapped
 
+@staticmethod
+cdef wrap(const shared_ptr[CScalar]& wrapped):
+cdef:
+Scalar self
+Type type_id = wrapped.get().type.get().id()
 
-_NULL = NA = NullType()
+if type_id == _Type_NA:
+return _NULL
 
+typ = _scalar_classes[type_id]
+self = typ.__new__(typ)
+self.init(wrapped)
 
-cdef class ArrayValue(Scalar):
-"""
-The base class for non-null array elements.
-"""
+return self
 
-def __init__(self):
-raise TypeError("Do not call {}'s constructor directly, use array "
-"subscription instead."
-.format(self.__class__.__name__))
+cdef inline shared_ptr[CScalar] unwrap(self) nogil:
+return self.wrapped
 
-cdef void init(self, DataType type, const shared_ptr[CArray]& sp_array,
-   int64_t index):
-self.type = type
-self.index = index
-self._set_array(sp_array)
+@property
+def type(self):
+"""
+Data type of the Scalar object.
+"""
+return pyarrow_wrap_data_type(self.wrapped.get().type)
 
-cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
-self.sp_array = sp_array
+@property
+def is_valid(self):
+"""
+Holds a valid (non-null) value.
+"""
+return self.wrapped.get().is_valid
 
 def __repr__(self):
-if hasattr(self, 'as_py'):
-return repr(self.as_py())
-else:
-return super(Scalar, self).__repr__()
+return ''.format(
+self.__class__.__name__, self.as_py()
+)
 
 def __str__(self):
-if hasattr(self, 'as_py'):
-return str(self.as_py())
-else:
-return super(Scalar, self).__str__()
+return str(self.as_py())
+
+def equals(self, Scalar other):
+return self.wrapped.get().Equals(other.unwrap().get()[0])

Review comment:
   Removed the options `.as_py()` conversion from the equality check, so 
now a scalar can be equal to another scalar only.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-07-01 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r448559330



##
File path: python/pyarrow/tests/test_convert_builtin.py
##
@@ -968,25 +968,31 @@ def test_sequence_timestamp_from_int_with_unit():
 arr_s = pa.array(data, type=s)
 assert len(arr_s) == 1
 assert arr_s.type == s
-assert repr(arr_s[0]) == "datetime.datetime(1970, 1, 1, 0, 0, 1)"
+assert repr(arr_s[0].as_py()) == "datetime.datetime(1970, 1, 1, 0, 0, 1)"

Review comment:
   Updated.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-07-01 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r448556316



##
File path: python/pyarrow/scalar.pxi
##
@@ -16,1198 +16,745 @@
 # under the License.
 
 
-_NULL = NA = None
+import collections
 
 
 cdef class Scalar:
 """
-The base class for all array elements.
+The base class for scalars.
 """
 
+def __init__(self):
+raise TypeError("Do not call {}'s constructor directly, use "
+"pa.scalar() instead.".format(self.__class__.__name__))
 
-cdef class NullType(Scalar):
-"""
-Singleton for null array elements.
-"""
-# TODO rename this NullValue?
+cdef void init(self, const shared_ptr[CScalar]& wrapped):
+self.wrapped = wrapped
 
-def __cinit__(self):
-global NA
-if NA is not None:
-raise Exception('Cannot create multiple NAType instances')
+@staticmethod
+cdef wrap(const shared_ptr[CScalar]& wrapped):
+cdef:
+Scalar self
+Type type_id = wrapped.get().type.get().id()
+
+if type_id == _Type_NA:
+return _NULL
+
+typ = _scalar_classes[type_id]
+self = typ.__new__(typ)
+self.init(wrapped)
+
+return self
+
+cdef inline shared_ptr[CScalar] unwrap(self) nogil:
+return self.wrapped
+
+@property
+def type(self):
+return pyarrow_wrap_data_type(self.wrapped.get().type)
 
-self.type = null()
+@property
+def is_valid(self):
+return self.wrapped.get().is_valid
 
 def __repr__(self):
-return 'NULL'
+return ''.format(
+self.__class__.__name__, self.as_py()
+)
 
-def as_py(self):
-"""
-Return None
-"""
-return None
+def __str__(self):
+return str(self.as_py())
+
+def equals(self, Scalar other):
+return self.wrapped.get().Equals(other.unwrap().get()[0])
 
 def __eq__(self, other):
-return NA
+try:
+if not isinstance(other, Scalar):
+other = scalar(other, type=self.type)
+return self.equals(other)
+except (TypeError, ValueError, ArrowInvalid):
+return NotImplemented
+
+def __hash__(self):
+cdef CScalarHash hasher
+return hasher(self.wrapped)
+
+def as_py(self):
+raise NotImplementedError()
 
 
-_NULL = NA = NullType()
+_NULL = NA = None
 
 
-cdef class ArrayValue(Scalar):
+cdef class NullScalar(Scalar):
 """
-The base class for non-null array elements.
+Concrete class for null scalars.
 """
 
-def __init__(self):
-raise TypeError("Do not call {}'s constructor directly, use array "
-"subscription instead."
-.format(self.__class__.__name__))
+def __cinit__(self):
+global NA
+if NA is not None:
+raise Exception('Cannot create multiple NAType instances')
+self.init(shared_ptr[CScalar](new CNullScalar()))
 
-cdef void init(self, DataType type, const shared_ptr[CArray]& sp_array,
-   int64_t index):
-self.type = type
-self.index = index
-self._set_array(sp_array)
+def __init__(self):
+pass
 
-cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
-self.sp_array = sp_array
+def __eq__(self, other):
+return NA
 
-def __repr__(self):
-if hasattr(self, 'as_py'):
-return repr(self.as_py())
-else:
-return super(Scalar, self).__repr__()
+def __hash__(self):
+cdef CScalarHash hasher
+return hasher(self.wrapped)
 
-def __str__(self):
-if hasattr(self, 'as_py'):
-return str(self.as_py())
-else:
-return super(Scalar, self).__str__()
+def as_py(self):
+"""
+Return this value as a Python None.
+"""
+return None
 
-def __eq__(self, other):
-if hasattr(self, 'as_py'):
-if isinstance(other, ArrayValue):
-other = other.as_py()
-return self.as_py() == other
-else:
-raise NotImplementedError(
-"Cannot compare Arrow values that don't support as_py()")
 
-def __hash__(self):
-return hash(self.as_py())
+_NULL = NA = NullScalar()
 
 
-cdef class BooleanValue(ArrayValue):
+cdef class BooleanScalar(Scalar):
 """
-Concrete class for boolean array elements.
+Concrete class for boolean scalars.
 """
 
 def as_py(self):
 """
 Return this value as a Python bool.
 """
-cdef CBooleanArray* ap =  self.sp_array.get()
-return ap.Value(self.index)
+cdef CBooleanScalar* sp =  self.wrapped.get()
+return sp.value if sp.is_valid else None
 
 
-cdef class Int8Value(ArrayValue):
+cdef class UInt8Scalar(Scalar):
 """
-   

[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-07-01 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r448551482



##
File path: python/pyarrow/scalar.pxi
##
@@ -16,1198 +16,745 @@
 # under the License.
 
 
-_NULL = NA = None
+import collections
 
 
 cdef class Scalar:
 """
-The base class for all array elements.
+The base class for scalars.
 """
 
+def __init__(self):
+raise TypeError("Do not call {}'s constructor directly, use "
+"pa.scalar() instead.".format(self.__class__.__name__))
 
-cdef class NullType(Scalar):
-"""
-Singleton for null array elements.
-"""
-# TODO rename this NullValue?
+cdef void init(self, const shared_ptr[CScalar]& wrapped):
+self.wrapped = wrapped
 
-def __cinit__(self):
-global NA
-if NA is not None:
-raise Exception('Cannot create multiple NAType instances')
+@staticmethod
+cdef wrap(const shared_ptr[CScalar]& wrapped):
+cdef:
+Scalar self
+Type type_id = wrapped.get().type.get().id()
+
+if type_id == _Type_NA:
+return _NULL
+
+typ = _scalar_classes[type_id]
+self = typ.__new__(typ)
+self.init(wrapped)
+
+return self
+
+cdef inline shared_ptr[CScalar] unwrap(self) nogil:
+return self.wrapped
+
+@property
+def type(self):
+return pyarrow_wrap_data_type(self.wrapped.get().type)
 
-self.type = null()
+@property
+def is_valid(self):
+return self.wrapped.get().is_valid
 
 def __repr__(self):
-return 'NULL'
+return ''.format(
+self.__class__.__name__, self.as_py()
+)
 
-def as_py(self):
-"""
-Return None
-"""
-return None
+def __str__(self):
+return str(self.as_py())
+
+def equals(self, Scalar other):
+return self.wrapped.get().Equals(other.unwrap().get()[0])
 
 def __eq__(self, other):
-return NA
+try:
+if not isinstance(other, Scalar):
+other = scalar(other, type=self.type)
+return self.equals(other)
+except (TypeError, ValueError, ArrowInvalid):
+return NotImplemented
+
+def __hash__(self):
+cdef CScalarHash hasher
+return hasher(self.wrapped)
+
+def as_py(self):
+raise NotImplementedError()
 
 
-_NULL = NA = NullType()
+_NULL = NA = None
 
 
-cdef class ArrayValue(Scalar):
+cdef class NullScalar(Scalar):
 """
-The base class for non-null array elements.
+Concrete class for null scalars.
 """
 
-def __init__(self):
-raise TypeError("Do not call {}'s constructor directly, use array "
-"subscription instead."
-.format(self.__class__.__name__))
+def __cinit__(self):
+global NA
+if NA is not None:
+raise Exception('Cannot create multiple NAType instances')
+self.init(shared_ptr[CScalar](new CNullScalar()))
 
-cdef void init(self, DataType type, const shared_ptr[CArray]& sp_array,
-   int64_t index):
-self.type = type
-self.index = index
-self._set_array(sp_array)
+def __init__(self):
+pass
 
-cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
-self.sp_array = sp_array
+def __eq__(self, other):
+return NA

Review comment:
   Updated to return False.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-07-01 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r448551033



##
File path: python/pyarrow/tests/test_scalars.py
##
@@ -16,427 +16,443 @@
 # under the License.
 
 import datetime
+import decimal
 import pytest
-import unittest
 
 import numpy as np
 
 import pyarrow as pa
 
 
-class TestScalars(unittest.TestCase):
-
-def test_null_singleton(self):
-with pytest.raises(Exception):
-pa.NAType()
+@pytest.mark.parametrize(['value', 'ty', 'klass', 'deprecated'], [
+(False, None, pa.BooleanScalar, pa.BooleanValue),
+(True, None, pa.BooleanScalar, pa.BooleanValue),
+(1, None, pa.Int64Scalar, pa.Int64Value),
+(-1, None, pa.Int64Scalar, pa.Int64Value),
+(1, pa.int8(), pa.Int8Scalar, pa.Int8Value),
+(1, pa.uint8(), pa.UInt8Scalar, pa.UInt8Value),
+(1, pa.int16(), pa.Int16Scalar, pa.Int16Value),
+(1, pa.uint16(), pa.UInt16Scalar, pa.UInt16Value),
+(1, pa.int32(), pa.Int32Scalar, pa.Int32Value),
+(1, pa.uint32(), pa.UInt32Scalar, pa.UInt32Value),
+(1, pa.int64(), pa.Int64Scalar, pa.Int64Value),
+(1, pa.uint64(), pa.UInt64Scalar, pa.UInt64Value),
+(1.0, None, pa.DoubleScalar, pa.DoubleValue),
+(np.float16(1.0), pa.float16(), pa.HalfFloatScalar, pa.HalfFloatValue),
+(1.0, pa.float32(), pa.FloatScalar, pa.FloatValue),
+("string", None, pa.StringScalar, pa.StringValue),
+(b"bytes", None, pa.BinaryScalar, pa.BinaryValue),
+([1, 2, 3], None, pa.ListScalar, pa.ListValue),
+([1, 2, 3, 4], pa.large_list(pa.int8()), pa.LargeListScalar,
+ pa.LargeListValue),
+(datetime.date.today(), None, pa.Date32Scalar, pa.Date64Value),
+(datetime.datetime.now(), None, pa.TimestampScalar, pa.TimestampValue),
+({'a': 1, 'b': [1, 2]}, None, pa.StructScalar, pa.StructValue)
+])
+def test_basics(value, ty, klass, deprecated):
+s = pa.scalar(value, type=ty)
+assert isinstance(s, klass)
+assert s == value
+assert s == s
+assert s != "else"
+assert hash(s) == hash(s)
+assert s.is_valid is True
+with pytest.warns(FutureWarning):
+isinstance(s, deprecated)
+
+s = pa.scalar(None, type=s.type)
+assert s.is_valid is False
+assert s.as_py() is None
+
+
+def test_null_singleton():
+with pytest.raises(Exception):
+pa.NullScalar()
+
+
+def test_nulls():
+null = pa.scalar(None)
+assert null is pa.NA
+assert null.as_py() is None
+assert (null == "something") is pa.NA

Review comment:
   Added and updated the `__eq__` behavior to return `False`.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-07-01 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r448551033



##
File path: python/pyarrow/tests/test_scalars.py
##
@@ -16,427 +16,443 @@
 # under the License.
 
 import datetime
+import decimal
 import pytest
-import unittest
 
 import numpy as np
 
 import pyarrow as pa
 
 
-class TestScalars(unittest.TestCase):
-
-def test_null_singleton(self):
-with pytest.raises(Exception):
-pa.NAType()
+@pytest.mark.parametrize(['value', 'ty', 'klass', 'deprecated'], [
+(False, None, pa.BooleanScalar, pa.BooleanValue),
+(True, None, pa.BooleanScalar, pa.BooleanValue),
+(1, None, pa.Int64Scalar, pa.Int64Value),
+(-1, None, pa.Int64Scalar, pa.Int64Value),
+(1, pa.int8(), pa.Int8Scalar, pa.Int8Value),
+(1, pa.uint8(), pa.UInt8Scalar, pa.UInt8Value),
+(1, pa.int16(), pa.Int16Scalar, pa.Int16Value),
+(1, pa.uint16(), pa.UInt16Scalar, pa.UInt16Value),
+(1, pa.int32(), pa.Int32Scalar, pa.Int32Value),
+(1, pa.uint32(), pa.UInt32Scalar, pa.UInt32Value),
+(1, pa.int64(), pa.Int64Scalar, pa.Int64Value),
+(1, pa.uint64(), pa.UInt64Scalar, pa.UInt64Value),
+(1.0, None, pa.DoubleScalar, pa.DoubleValue),
+(np.float16(1.0), pa.float16(), pa.HalfFloatScalar, pa.HalfFloatValue),
+(1.0, pa.float32(), pa.FloatScalar, pa.FloatValue),
+("string", None, pa.StringScalar, pa.StringValue),
+(b"bytes", None, pa.BinaryScalar, pa.BinaryValue),
+([1, 2, 3], None, pa.ListScalar, pa.ListValue),
+([1, 2, 3, 4], pa.large_list(pa.int8()), pa.LargeListScalar,
+ pa.LargeListValue),
+(datetime.date.today(), None, pa.Date32Scalar, pa.Date64Value),
+(datetime.datetime.now(), None, pa.TimestampScalar, pa.TimestampValue),
+({'a': 1, 'b': [1, 2]}, None, pa.StructScalar, pa.StructValue)
+])
+def test_basics(value, ty, klass, deprecated):
+s = pa.scalar(value, type=ty)
+assert isinstance(s, klass)
+assert s == value
+assert s == s
+assert s != "else"
+assert hash(s) == hash(s)
+assert s.is_valid is True
+with pytest.warns(FutureWarning):
+isinstance(s, deprecated)
+
+s = pa.scalar(None, type=s.type)
+assert s.is_valid is False
+assert s.as_py() is None
+
+
+def test_null_singleton():
+with pytest.raises(Exception):
+pa.NullScalar()
+
+
+def test_nulls():
+null = pa.scalar(None)
+assert null is pa.NA
+assert null.as_py() is None
+assert (null == "something") is pa.NA

Review comment:
   Added.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-07-01 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r448550619



##
File path: python/pyarrow/scalar.pxi
##
@@ -16,1198 +16,748 @@
 # under the License.
 
 
-_NULL = NA = None
+import collections
 
 
 cdef class Scalar:
 """
-The base class for all array elements.
-"""
-
-
-cdef class NullType(Scalar):
-"""
-Singleton for null array elements.
+The base class for scalars.
 """
-# TODO rename this NullValue?
-
-def __cinit__(self):
-global NA
-if NA is not None:
-raise Exception('Cannot create multiple NAType instances')
-
-self.type = null()
-
-def __repr__(self):
-return 'NULL'
 
-def as_py(self):
-"""
-Return None
-"""
-return None
+def __init__(self):
+raise TypeError("Do not call {}'s constructor directly, use "
+"pa.scalar() instead.".format(self.__class__.__name__))
 
-def __eq__(self, other):
-return NA
+cdef void init(self, const shared_ptr[CScalar]& wrapped):
+self.wrapped = wrapped
 
+@staticmethod
+cdef wrap(const shared_ptr[CScalar]& wrapped):
+cdef:
+Scalar self
+Type type_id = wrapped.get().type.get().id()
 
-_NULL = NA = NullType()
+if type_id == _Type_NA:
+return _NULL
 
+typ = _scalar_classes[type_id]
+self = typ.__new__(typ)
+self.init(wrapped)
 
-cdef class ArrayValue(Scalar):
-"""
-The base class for non-null array elements.
-"""
+return self
 
-def __init__(self):
-raise TypeError("Do not call {}'s constructor directly, use array "
-"subscription instead."
-.format(self.__class__.__name__))
+cdef inline shared_ptr[CScalar] unwrap(self) nogil:
+return self.wrapped
 
-cdef void init(self, DataType type, const shared_ptr[CArray]& sp_array,
-   int64_t index):
-self.type = type
-self.index = index
-self._set_array(sp_array)
+@property
+def type(self):
+"""
+Data type of the Scalar object.
+"""
+return pyarrow_wrap_data_type(self.wrapped.get().type)
 
-cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
-self.sp_array = sp_array
+@property
+def is_valid(self):
+"""
+Holds a valid (non-null) value.
+"""
+return self.wrapped.get().is_valid
 
 def __repr__(self):
-if hasattr(self, 'as_py'):
-return repr(self.as_py())
-else:
-return super(Scalar, self).__repr__()
+return ''.format(
+self.__class__.__name__, self.as_py()
+)
 
 def __str__(self):
-if hasattr(self, 'as_py'):
-return str(self.as_py())
-else:
-return super(Scalar, self).__str__()
+return str(self.as_py())
+
+def equals(self, Scalar other):
+return self.wrapped.get().Equals(other.unwrap().get()[0])
 
 def __eq__(self, other):
-if hasattr(self, 'as_py'):
-if isinstance(other, ArrayValue):
-other = other.as_py()
-return self.as_py() == other
-else:
-raise NotImplementedError(
-"Cannot compare Arrow values that don't support as_py()")
+try:
+if not isinstance(other, Scalar):
+other = scalar(other, type=self.type)
+return self.equals(other)
+except (TypeError, ValueError, ArrowInvalid):
+return NotImplemented
 
 def __hash__(self):
-return hash(self.as_py())
+cdef CScalarHash hasher
+return hasher(self.wrapped)
 
+def as_py(self):
+raise NotImplementedError()
 
-cdef class BooleanValue(ArrayValue):
-"""
-Concrete class for boolean array elements.
-"""
 
-def as_py(self):
-"""
-Return this value as a Python bool.
-"""
-cdef CBooleanArray* ap =  self.sp_array.get()
-return ap.Value(self.index)
+_NULL = NA = None
 
 
-cdef class Int8Value(ArrayValue):
+cdef class NullScalar(Scalar):
 """
-Concrete class for int8 array elements.
+Concrete class for null scalars.
 """
 
-def as_py(self):
-"""
-Return this value as a Python int.
-"""
-cdef CInt8Array* ap =  self.sp_array.get()
-return ap.Value(self.index)
+def __cinit__(self):
+global NA
+if NA is not None:
+raise RuntimeError('Cannot create multiple NullScalar instances')
+self.init(shared_ptr[CScalar](new CNullScalar()))
 
+def __init__(self):
+pass
 
-cdef class UInt8Value(ArrayValue):
-"""
-Concrete class for uint8 array elements.
-"""
+def __eq__(self, other):
+return NA
+
+def __hash__(self):
+   

[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-06-30 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r447758318



##
File path: python/pyarrow/tests/test_scalars.py
##
@@ -16,427 +16,443 @@
 # under the License.
 
 import datetime
+import decimal
 import pytest
-import unittest
 
 import numpy as np
 
 import pyarrow as pa
 
 
-class TestScalars(unittest.TestCase):
-
-def test_null_singleton(self):
-with pytest.raises(Exception):
-pa.NAType()
+@pytest.mark.parametrize(['value', 'ty', 'klass', 'deprecated'], [
+(False, None, pa.BooleanScalar, pa.BooleanValue),
+(True, None, pa.BooleanScalar, pa.BooleanValue),
+(1, None, pa.Int64Scalar, pa.Int64Value),
+(-1, None, pa.Int64Scalar, pa.Int64Value),
+(1, pa.int8(), pa.Int8Scalar, pa.Int8Value),
+(1, pa.uint8(), pa.UInt8Scalar, pa.UInt8Value),
+(1, pa.int16(), pa.Int16Scalar, pa.Int16Value),
+(1, pa.uint16(), pa.UInt16Scalar, pa.UInt16Value),
+(1, pa.int32(), pa.Int32Scalar, pa.Int32Value),
+(1, pa.uint32(), pa.UInt32Scalar, pa.UInt32Value),
+(1, pa.int64(), pa.Int64Scalar, pa.Int64Value),
+(1, pa.uint64(), pa.UInt64Scalar, pa.UInt64Value),
+(1.0, None, pa.DoubleScalar, pa.DoubleValue),
+(np.float16(1.0), pa.float16(), pa.HalfFloatScalar, pa.HalfFloatValue),
+(1.0, pa.float32(), pa.FloatScalar, pa.FloatValue),
+("string", None, pa.StringScalar, pa.StringValue),
+(b"bytes", None, pa.BinaryScalar, pa.BinaryValue),
+([1, 2, 3], None, pa.ListScalar, pa.ListValue),
+([1, 2, 3, 4], pa.large_list(pa.int8()), pa.LargeListScalar,
+ pa.LargeListValue),
+(datetime.date.today(), None, pa.Date32Scalar, pa.Date64Value),
+(datetime.datetime.now(), None, pa.TimestampScalar, pa.TimestampValue),
+({'a': 1, 'b': [1, 2]}, None, pa.StructScalar, pa.StructValue)
+])
+def test_basics(value, ty, klass, deprecated):
+s = pa.scalar(value, type=ty)
+assert isinstance(s, klass)
+assert s == value
+assert s == s
+assert s != "else"
+assert hash(s) == hash(s)

Review comment:
   It tests that different calls of the c++ hashing mechanism produces the 
same hash values.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-06-30 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r447738979



##
File path: python/pyarrow/scalar.pxi
##
@@ -16,1198 +16,745 @@
 # under the License.
 
 
-_NULL = NA = None
+import collections
 
 
 cdef class Scalar:
 """
-The base class for all array elements.
+The base class for scalars.
 """
 
+def __init__(self):
+raise TypeError("Do not call {}'s constructor directly, use "
+"pa.scalar() instead.".format(self.__class__.__name__))
 
-cdef class NullType(Scalar):
-"""
-Singleton for null array elements.
-"""
-# TODO rename this NullValue?
+cdef void init(self, const shared_ptr[CScalar]& wrapped):
+self.wrapped = wrapped
 
-def __cinit__(self):
-global NA
-if NA is not None:
-raise Exception('Cannot create multiple NAType instances')
+@staticmethod
+cdef wrap(const shared_ptr[CScalar]& wrapped):
+cdef:
+Scalar self
+Type type_id = wrapped.get().type.get().id()
+
+if type_id == _Type_NA:
+return _NULL
+
+typ = _scalar_classes[type_id]
+self = typ.__new__(typ)
+self.init(wrapped)
+
+return self
+
+cdef inline shared_ptr[CScalar] unwrap(self) nogil:
+return self.wrapped
+
+@property
+def type(self):
+return pyarrow_wrap_data_type(self.wrapped.get().type)
 
-self.type = null()
+@property
+def is_valid(self):
+return self.wrapped.get().is_valid
 
 def __repr__(self):
-return 'NULL'
+return ''.format(
+self.__class__.__name__, self.as_py()
+)
 
-def as_py(self):
-"""
-Return None
-"""
-return None
+def __str__(self):
+return str(self.as_py())
+
+def equals(self, Scalar other):
+return self.wrapped.get().Equals(other.unwrap().get()[0])
 
 def __eq__(self, other):
-return NA
+try:
+if not isinstance(other, Scalar):
+other = scalar(other, type=self.type)
+return self.equals(other)
+except (TypeError, ValueError, ArrowInvalid):
+return NotImplemented
+
+def __hash__(self):
+cdef CScalarHash hasher
+return hasher(self.wrapped)
+
+def as_py(self):
+raise NotImplementedError()
 
 
-_NULL = NA = NullType()
+_NULL = NA = None
 
 
-cdef class ArrayValue(Scalar):
+cdef class NullScalar(Scalar):
 """
-The base class for non-null array elements.
+Concrete class for null scalars.
 """
 
-def __init__(self):
-raise TypeError("Do not call {}'s constructor directly, use array "
-"subscription instead."
-.format(self.__class__.__name__))
+def __cinit__(self):
+global NA
+if NA is not None:
+raise Exception('Cannot create multiple NAType instances')
+self.init(shared_ptr[CScalar](new CNullScalar()))
 
-cdef void init(self, DataType type, const shared_ptr[CArray]& sp_array,
-   int64_t index):
-self.type = type
-self.index = index
-self._set_array(sp_array)
+def __init__(self):
+pass
 
-cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
-self.sp_array = sp_array
+def __eq__(self, other):
+return NA
 
-def __repr__(self):
-if hasattr(self, 'as_py'):
-return repr(self.as_py())
-else:
-return super(Scalar, self).__repr__()
+def __hash__(self):
+cdef CScalarHash hasher
+return hasher(self.wrapped)
 
-def __str__(self):
-if hasattr(self, 'as_py'):
-return str(self.as_py())
-else:
-return super(Scalar, self).__str__()
+def as_py(self):
+"""
+Return this value as a Python None.
+"""
+return None
 
-def __eq__(self, other):
-if hasattr(self, 'as_py'):
-if isinstance(other, ArrayValue):
-other = other.as_py()
-return self.as_py() == other
-else:
-raise NotImplementedError(
-"Cannot compare Arrow values that don't support as_py()")
 
-def __hash__(self):
-return hash(self.as_py())
+_NULL = NA = NullScalar()
 
 
-cdef class BooleanValue(ArrayValue):
+cdef class BooleanScalar(Scalar):
 """
-Concrete class for boolean array elements.
+Concrete class for boolean scalars.
 """
 
 def as_py(self):
 """
 Return this value as a Python bool.
 """
-cdef CBooleanArray* ap =  self.sp_array.get()
-return ap.Value(self.index)
+cdef CBooleanScalar* sp =  self.wrapped.get()
+return sp.value if sp.is_valid else None
 
 
-cdef class Int8Value(ArrayValue):
+cdef class UInt8Scalar(Scalar):
 """
-   

[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-06-30 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r447734944



##
File path: python/pyarrow/scalar.pxi
##
@@ -16,1198 +16,745 @@
 # under the License.
 
 
-_NULL = NA = None
+import collections
 
 
 cdef class Scalar:
 """
-The base class for all array elements.
+The base class for scalars.
 """
 
+def __init__(self):
+raise TypeError("Do not call {}'s constructor directly, use "
+"pa.scalar() instead.".format(self.__class__.__name__))
 
-cdef class NullType(Scalar):
-"""
-Singleton for null array elements.
-"""
-# TODO rename this NullValue?
+cdef void init(self, const shared_ptr[CScalar]& wrapped):
+self.wrapped = wrapped
 
-def __cinit__(self):
-global NA
-if NA is not None:
-raise Exception('Cannot create multiple NAType instances')
+@staticmethod
+cdef wrap(const shared_ptr[CScalar]& wrapped):
+cdef:
+Scalar self
+Type type_id = wrapped.get().type.get().id()
+
+if type_id == _Type_NA:
+return _NULL
+
+typ = _scalar_classes[type_id]
+self = typ.__new__(typ)
+self.init(wrapped)
+
+return self
+
+cdef inline shared_ptr[CScalar] unwrap(self) nogil:
+return self.wrapped
+
+@property
+def type(self):
+return pyarrow_wrap_data_type(self.wrapped.get().type)
 
-self.type = null()
+@property
+def is_valid(self):
+return self.wrapped.get().is_valid
 
 def __repr__(self):
-return 'NULL'
+return ''.format(
+self.__class__.__name__, self.as_py()
+)
 
-def as_py(self):
-"""
-Return None
-"""
-return None
+def __str__(self):
+return str(self.as_py())
+
+def equals(self, Scalar other):
+return self.wrapped.get().Equals(other.unwrap().get()[0])
 
 def __eq__(self, other):
-return NA
+try:
+if not isinstance(other, Scalar):
+other = scalar(other, type=self.type)
+return self.equals(other)
+except (TypeError, ValueError, ArrowInvalid):
+return NotImplemented
+
+def __hash__(self):
+cdef CScalarHash hasher
+return hasher(self.wrapped)
+
+def as_py(self):
+raise NotImplementedError()
 
 
-_NULL = NA = NullType()
+_NULL = NA = None
 
 
-cdef class ArrayValue(Scalar):
+cdef class NullScalar(Scalar):
 """
-The base class for non-null array elements.
+Concrete class for null scalars.
 """
 
-def __init__(self):
-raise TypeError("Do not call {}'s constructor directly, use array "
-"subscription instead."
-.format(self.__class__.__name__))
+def __cinit__(self):
+global NA
+if NA is not None:
+raise Exception('Cannot create multiple NAType instances')
+self.init(shared_ptr[CScalar](new CNullScalar()))
 
-cdef void init(self, DataType type, const shared_ptr[CArray]& sp_array,
-   int64_t index):
-self.type = type
-self.index = index
-self._set_array(sp_array)
+def __init__(self):
+pass
 
-cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
-self.sp_array = sp_array
+def __eq__(self, other):
+return NA
 
-def __repr__(self):
-if hasattr(self, 'as_py'):
-return repr(self.as_py())
-else:
-return super(Scalar, self).__repr__()
+def __hash__(self):
+cdef CScalarHash hasher
+return hasher(self.wrapped)
 
-def __str__(self):
-if hasattr(self, 'as_py'):
-return str(self.as_py())
-else:
-return super(Scalar, self).__str__()
+def as_py(self):
+"""
+Return this value as a Python None.
+"""
+return None
 
-def __eq__(self, other):
-if hasattr(self, 'as_py'):
-if isinstance(other, ArrayValue):
-other = other.as_py()
-return self.as_py() == other
-else:
-raise NotImplementedError(
-"Cannot compare Arrow values that don't support as_py()")
 
-def __hash__(self):
-return hash(self.as_py())
+_NULL = NA = NullScalar()
 
 
-cdef class BooleanValue(ArrayValue):
+cdef class BooleanScalar(Scalar):
 """
-Concrete class for boolean array elements.
+Concrete class for boolean scalars.
 """
 
 def as_py(self):
 """
 Return this value as a Python bool.
 """
-cdef CBooleanArray* ap =  self.sp_array.get()
-return ap.Value(self.index)
+cdef CBooleanScalar* sp =  self.wrapped.get()
+return sp.value if sp.is_valid else None
 
 
-cdef class Int8Value(ArrayValue):
+cdef class UInt8Scalar(Scalar):
 """
-   

[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-06-30 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r447733611



##
File path: python/pyarrow/scalar.pxi
##
@@ -16,1198 +16,745 @@
 # under the License.
 
 
-_NULL = NA = None
+import collections
 
 
 cdef class Scalar:
 """
-The base class for all array elements.
+The base class for scalars.
 """
 
+def __init__(self):
+raise TypeError("Do not call {}'s constructor directly, use "
+"pa.scalar() instead.".format(self.__class__.__name__))
 
-cdef class NullType(Scalar):
-"""
-Singleton for null array elements.
-"""
-# TODO rename this NullValue?
+cdef void init(self, const shared_ptr[CScalar]& wrapped):
+self.wrapped = wrapped
 
-def __cinit__(self):
-global NA
-if NA is not None:
-raise Exception('Cannot create multiple NAType instances')
+@staticmethod
+cdef wrap(const shared_ptr[CScalar]& wrapped):
+cdef:
+Scalar self
+Type type_id = wrapped.get().type.get().id()
+
+if type_id == _Type_NA:
+return _NULL
+
+typ = _scalar_classes[type_id]
+self = typ.__new__(typ)
+self.init(wrapped)
+
+return self
+
+cdef inline shared_ptr[CScalar] unwrap(self) nogil:
+return self.wrapped
+
+@property
+def type(self):
+return pyarrow_wrap_data_type(self.wrapped.get().type)
 
-self.type = null()
+@property
+def is_valid(self):
+return self.wrapped.get().is_valid
 
 def __repr__(self):
-return 'NULL'
+return ''.format(
+self.__class__.__name__, self.as_py()
+)
 
-def as_py(self):
-"""
-Return None
-"""
-return None
+def __str__(self):
+return str(self.as_py())
+
+def equals(self, Scalar other):
+return self.wrapped.get().Equals(other.unwrap().get()[0])
 
 def __eq__(self, other):
-return NA
+try:
+if not isinstance(other, Scalar):
+other = scalar(other, type=self.type)
+return self.equals(other)
+except (TypeError, ValueError, ArrowInvalid):
+return NotImplemented
+
+def __hash__(self):
+cdef CScalarHash hasher
+return hasher(self.wrapped)
+
+def as_py(self):
+raise NotImplementedError()
 
 
-_NULL = NA = NullType()
+_NULL = NA = None
 
 
-cdef class ArrayValue(Scalar):
+cdef class NullScalar(Scalar):
 """
-The base class for non-null array elements.
+Concrete class for null scalars.
 """
 
-def __init__(self):
-raise TypeError("Do not call {}'s constructor directly, use array "
-"subscription instead."
-.format(self.__class__.__name__))
+def __cinit__(self):
+global NA
+if NA is not None:
+raise Exception('Cannot create multiple NAType instances')
+self.init(shared_ptr[CScalar](new CNullScalar()))
 
-cdef void init(self, DataType type, const shared_ptr[CArray]& sp_array,
-   int64_t index):
-self.type = type
-self.index = index
-self._set_array(sp_array)
+def __init__(self):
+pass
 
-cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
-self.sp_array = sp_array
+def __eq__(self, other):
+return NA
 
-def __repr__(self):
-if hasattr(self, 'as_py'):
-return repr(self.as_py())
-else:
-return super(Scalar, self).__repr__()
+def __hash__(self):

Review comment:
   Once I override `__eq__` the inherited `__hash__` gets removed.

##
File path: python/pyarrow/scalar.pxi
##
@@ -16,1198 +16,745 @@
 # under the License.
 
 
-_NULL = NA = None
+import collections
 
 
 cdef class Scalar:
 """
-The base class for all array elements.
+The base class for scalars.
 """
 
+def __init__(self):
+raise TypeError("Do not call {}'s constructor directly, use "
+"pa.scalar() instead.".format(self.__class__.__name__))
 
-cdef class NullType(Scalar):
-"""
-Singleton for null array elements.
-"""
-# TODO rename this NullValue?
+cdef void init(self, const shared_ptr[CScalar]& wrapped):
+self.wrapped = wrapped
 
-def __cinit__(self):
-global NA
-if NA is not None:
-raise Exception('Cannot create multiple NAType instances')
+@staticmethod
+cdef wrap(const shared_ptr[CScalar]& wrapped):
+cdef:
+Scalar self
+Type type_id = wrapped.get().type.get().id()
+
+if type_id == _Type_NA:
+return _NULL
+
+typ = _scalar_classes[type_id]
+self = typ.__new__(typ)
+self.init(wrapped)
+
+return self
+
+cdef inline shared_ptr[CScalar] unwrap(self) nogil:
+

[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-06-30 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r447731034



##
File path: python/pyarrow/scalar.pxi
##
@@ -16,1198 +16,745 @@
 # under the License.
 
 
-_NULL = NA = None
+import collections
 
 
 cdef class Scalar:
 """
-The base class for all array elements.
+The base class for scalars.
 """
 
+def __init__(self):
+raise TypeError("Do not call {}'s constructor directly, use "
+"pa.scalar() instead.".format(self.__class__.__name__))
 
-cdef class NullType(Scalar):
-"""
-Singleton for null array elements.
-"""
-# TODO rename this NullValue?
+cdef void init(self, const shared_ptr[CScalar]& wrapped):
+self.wrapped = wrapped
 
-def __cinit__(self):
-global NA
-if NA is not None:
-raise Exception('Cannot create multiple NAType instances')
+@staticmethod
+cdef wrap(const shared_ptr[CScalar]& wrapped):
+cdef:
+Scalar self
+Type type_id = wrapped.get().type.get().id()
+
+if type_id == _Type_NA:
+return _NULL
+
+typ = _scalar_classes[type_id]
+self = typ.__new__(typ)
+self.init(wrapped)
+
+return self
+
+cdef inline shared_ptr[CScalar] unwrap(self) nogil:
+return self.wrapped
+
+@property
+def type(self):
+return pyarrow_wrap_data_type(self.wrapped.get().type)
 
-self.type = null()
+@property
+def is_valid(self):
+return self.wrapped.get().is_valid
 
 def __repr__(self):
-return 'NULL'
+return ''.format(
+self.__class__.__name__, self.as_py()
+)
 
-def as_py(self):
-"""
-Return None
-"""
-return None
+def __str__(self):
+return str(self.as_py())
+
+def equals(self, Scalar other):
+return self.wrapped.get().Equals(other.unwrap().get()[0])
 
 def __eq__(self, other):
-return NA
+try:
+if not isinstance(other, Scalar):
+other = scalar(other, type=self.type)
+return self.equals(other)
+except (TypeError, ValueError, ArrowInvalid):
+return NotImplemented
+
+def __hash__(self):
+cdef CScalarHash hasher
+return hasher(self.wrapped)
+
+def as_py(self):
+raise NotImplementedError()
 
 
-_NULL = NA = NullType()
+_NULL = NA = None
 
 
-cdef class ArrayValue(Scalar):
+cdef class NullScalar(Scalar):
 """
-The base class for non-null array elements.
+Concrete class for null scalars.
 """
 
-def __init__(self):
-raise TypeError("Do not call {}'s constructor directly, use array "
-"subscription instead."
-.format(self.__class__.__name__))
+def __cinit__(self):
+global NA
+if NA is not None:
+raise Exception('Cannot create multiple NAType instances')
+self.init(shared_ptr[CScalar](new CNullScalar()))
 
-cdef void init(self, DataType type, const shared_ptr[CArray]& sp_array,
-   int64_t index):
-self.type = type
-self.index = index
-self._set_array(sp_array)
+def __init__(self):
+pass
 
-cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
-self.sp_array = sp_array
+def __eq__(self, other):
+return NA

Review comment:
   Well, it was the behavior before the refactor. Shall we check proper 
equality with NullScalar instead?





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-06-30 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r447725217



##
File path: python/pyarrow/includes/libarrow.pxd
##
@@ -44,6 +44,11 @@ cdef extern from "arrow/util/key_value_metadata.h" namespace 
"arrow" nogil:
 c_bool Contains(const c_string& key) const
 
 
+cdef extern from "arrow/util/decimal.h" namespace "arrow" nogil:
+cdef cppclass CDecimal128" arrow::Decimal128":
+c_string ToString(int32_t scale) const

Review comment:
   Created jira https://issues.apache.org/jira/browse/ARROW-9279





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-06-26 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r446160460



##
File path: python/pyarrow/scalar.pxi
##
@@ -16,1198 +16,704 @@
 # under the License.
 
 
-_NULL = NA = None
-
-
 cdef class Scalar:
 """
-The base class for all array elements.
+The base class for scalars.
 """
 
+def __init__(self):
+raise TypeError("Do not call {}'s constructor directly, use "
+"pa.scalar() instead.".format(self.__class__.__name__))
 
-cdef class NullType(Scalar):
-"""
-Singleton for null array elements.
-"""
-# TODO rename this NullValue?
+cdef void init(self, const shared_ptr[CScalar]& wrapped):
+self.wrapped = wrapped
 
-def __cinit__(self):
-global NA
-if NA is not None:
-raise Exception('Cannot create multiple NAType instances')
+@staticmethod
+cdef wrap(const shared_ptr[CScalar]& wrapped):
+cdef:
+Scalar self
+Type type_id = wrapped.get().type.get().id()
+
+if type_id == _Type_NA:
+return _NULL
+
+typ = _scalar_classes[type_id]
+self = typ.__new__(typ)
+self.init(wrapped)
+
+return self
+
+cdef inline shared_ptr[CScalar] unwrap(self) nogil:
+return self.wrapped
 
-self.type = null()
+@property
+def type(self):
+return pyarrow_wrap_data_type(self.wrapped.get().type)
 
 def __repr__(self):
-return 'NULL'
+return ''.format(
+self.__class__.__name__, self.as_py()
+)
 
-def as_py(self):
-"""
-Return None
-"""
-return None
+def __str__(self):
+return str(self.as_py())
 
 def __eq__(self, other):
-return NA
+# TODO(kszucs): use c++ Equals
+if isinstance(other, Scalar):
+other = other.as_py()
+return self.as_py() == other
 
+def __hash__(self):
+# TODO(kszucs): use C++ hash if implemented for the type
+return hash(self.as_py())
+
+def as_py(self):
+raise NotImplementedError()
 
-_NULL = NA = NullType()
+
+_NULL = NA = None
 
 
-cdef class ArrayValue(Scalar):
+cdef class NullScalar(Scalar):
 """
-The base class for non-null array elements.
+Concrete class for null scalars.
 """
 
-def __init__(self):
-raise TypeError("Do not call {}'s constructor directly, use array "
-"subscription instead."
-.format(self.__class__.__name__))
-
-cdef void init(self, DataType type, const shared_ptr[CArray]& sp_array,
-   int64_t index):
-self.type = type
-self.index = index
-self._set_array(sp_array)
+def __cinit__(self):
+global NA
+if NA is not None:
+raise Exception('Cannot create multiple NAType instances')
+self.init(shared_ptr[CScalar](new CNullScalar()))
 
-cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
-self.sp_array = sp_array
+def __init__(self):
+pass
 
-def __repr__(self):
-if hasattr(self, 'as_py'):
-return repr(self.as_py())
-else:
-return super(Scalar, self).__repr__()
+def __eq__(self, other):
+return NA
 
-def __str__(self):
-if hasattr(self, 'as_py'):
-return str(self.as_py())
-else:
-return super(Scalar, self).__str__()
+def as_py(self):
+"""
+Return this value as a Python None.
+"""
+return None
 
-def __eq__(self, other):
-if hasattr(self, 'as_py'):
-if isinstance(other, ArrayValue):
-other = other.as_py()
-return self.as_py() == other
-else:
-raise NotImplementedError(
-"Cannot compare Arrow values that don't support as_py()")
 
-def __hash__(self):
-return hash(self.as_py())
+_NULL = NA = NullScalar()
 
 
-cdef class BooleanValue(ArrayValue):
+cdef class BooleanScalar(Scalar):
 """
-Concrete class for boolean array elements.
+Concrete class for boolean scalars.
 """
 
 def as_py(self):
 """
 Return this value as a Python bool.
 """
-cdef CBooleanArray* ap =  self.sp_array.get()
-return ap.Value(self.index)
+cdef CBooleanScalar* sp =  self.wrapped.get()
+return sp.value if sp.is_valid else None
 
 
-cdef class Int8Value(ArrayValue):
+cdef class UInt8Scalar(Scalar):
 """
-Concrete class for int8 array elements.
+Concrete class for uint8 scalars.
 """
 
 def as_py(self):
 """
 Return this value as a Python int.
 """
-cdef CInt8Array* ap =  self.sp_array.get()
-return ap.Value(self.index)
+cdef CUInt8Scalar* sp =  self.wrapped.get()
+return sp.value if sp.is_valid 

[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-06-26 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r446148546



##
File path: python/pyarrow/scalar.pxi
##
@@ -16,1198 +16,704 @@
 # under the License.
 
 
-_NULL = NA = None
-
-
 cdef class Scalar:
 """
-The base class for all array elements.
+The base class for scalars.
 """
 
+def __init__(self):
+raise TypeError("Do not call {}'s constructor directly, use "
+"pa.scalar() instead.".format(self.__class__.__name__))
 
-cdef class NullType(Scalar):
-"""
-Singleton for null array elements.
-"""
-# TODO rename this NullValue?
+cdef void init(self, const shared_ptr[CScalar]& wrapped):
+self.wrapped = wrapped
 
-def __cinit__(self):
-global NA
-if NA is not None:
-raise Exception('Cannot create multiple NAType instances')
+@staticmethod
+cdef wrap(const shared_ptr[CScalar]& wrapped):
+cdef:
+Scalar self
+Type type_id = wrapped.get().type.get().id()
+
+if type_id == _Type_NA:
+return _NULL
+
+typ = _scalar_classes[type_id]
+self = typ.__new__(typ)
+self.init(wrapped)
+
+return self
+
+cdef inline shared_ptr[CScalar] unwrap(self) nogil:
+return self.wrapped
 
-self.type = null()
+@property
+def type(self):
+return pyarrow_wrap_data_type(self.wrapped.get().type)
 
 def __repr__(self):
-return 'NULL'
+return ''.format(
+self.__class__.__name__, self.as_py()
+)
 
-def as_py(self):
-"""
-Return None
-"""
-return None
+def __str__(self):
+return str(self.as_py())
 
 def __eq__(self, other):
-return NA
+# TODO(kszucs): use c++ Equals
+if isinstance(other, Scalar):
+other = other.as_py()
+return self.as_py() == other
 
+def __hash__(self):
+# TODO(kszucs): use C++ hash if implemented for the type
+return hash(self.as_py())
+
+def as_py(self):
+raise NotImplementedError()
 
-_NULL = NA = NullType()
+
+_NULL = NA = None
 
 
-cdef class ArrayValue(Scalar):
+cdef class NullScalar(Scalar):
 """
-The base class for non-null array elements.
+Concrete class for null scalars.
 """
 
-def __init__(self):
-raise TypeError("Do not call {}'s constructor directly, use array "
-"subscription instead."
-.format(self.__class__.__name__))
-
-cdef void init(self, DataType type, const shared_ptr[CArray]& sp_array,
-   int64_t index):
-self.type = type
-self.index = index
-self._set_array(sp_array)
+def __cinit__(self):
+global NA
+if NA is not None:
+raise Exception('Cannot create multiple NAType instances')
+self.init(shared_ptr[CScalar](new CNullScalar()))
 
-cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
-self.sp_array = sp_array
+def __init__(self):
+pass
 
-def __repr__(self):
-if hasattr(self, 'as_py'):
-return repr(self.as_py())
-else:
-return super(Scalar, self).__repr__()
+def __eq__(self, other):
+return NA
 
-def __str__(self):
-if hasattr(self, 'as_py'):
-return str(self.as_py())
-else:
-return super(Scalar, self).__str__()
+def as_py(self):
+"""
+Return this value as a Python None.
+"""
+return None
 
-def __eq__(self, other):
-if hasattr(self, 'as_py'):
-if isinstance(other, ArrayValue):
-other = other.as_py()
-return self.as_py() == other
-else:
-raise NotImplementedError(
-"Cannot compare Arrow values that don't support as_py()")
 
-def __hash__(self):
-return hash(self.as_py())
+_NULL = NA = NullScalar()
 
 
-cdef class BooleanValue(ArrayValue):
+cdef class BooleanScalar(Scalar):
 """
-Concrete class for boolean array elements.
+Concrete class for boolean scalars.
 """
 
 def as_py(self):
 """
 Return this value as a Python bool.
 """
-cdef CBooleanArray* ap =  self.sp_array.get()
-return ap.Value(self.index)
+cdef CBooleanScalar* sp =  self.wrapped.get()
+return sp.value if sp.is_valid else None
 
 
-cdef class Int8Value(ArrayValue):
+cdef class UInt8Scalar(Scalar):
 """
-Concrete class for int8 array elements.
+Concrete class for uint8 scalars.
 """
 
 def as_py(self):
 """
 Return this value as a Python int.
 """
-cdef CInt8Array* ap =  self.sp_array.get()
-return ap.Value(self.index)
+cdef CUInt8Scalar* sp =  self.wrapped.get()
+return sp.value if sp.is_valid 

[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-06-26 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r446148105



##
File path: python/pyarrow/tests/test_parquet.py
##
@@ -2028,7 +2028,7 @@ def test_filters_invalid_pred_op(tempdir, 
use_legacy_dataset):
 use_legacy_dataset=use_legacy_dataset)
 assert dataset.read().num_rows == 0
 
-with pytest.raises(ValueError if use_legacy_dataset else TypeError):
+with pytest.raises(ValueError if use_legacy_dataset else pa.ArrowInvalid):
 # dataset API returns TypeError when trying create invalid comparison

Review comment:
   Nice, updated.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-06-26 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r446147913



##
File path: python/pyarrow/_dataset.pyx
##
@@ -216,22 +216,18 @@ cdef class Expression:
 @staticmethod
 def _scalar(value):
 cdef:
-shared_ptr[CScalar] scalar
-
-if value is None:
-scalar.reset(new CNullScalar())
-elif isinstance(value, bool):
-scalar = MakeScalar(value)
-elif isinstance(value, float):
-scalar = MakeScalar(value)
-elif isinstance(value, int):
-scalar = MakeScalar(value)
-elif isinstance(value, (bytes, str)):
-scalar = MakeStringScalar(tobytes(value))

Review comment:
   Removed it.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-06-25 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r445603562



##
File path: python/pyarrow/util.py
##
@@ -41,6 +41,24 @@ def wrapper(*args, **kwargs):
 return wrapper
 
 
+def _deprecate_class(old_name, new_class, next_version,
+ instancecheck=True):
+"""
+Raise warning if a deprecated class is used in an isinstance check.

Review comment:
   The deprecated classes cannot be instantiated, so we don't need to worry 
about it for now - although we can add support for overriding `__init__` for 
later reuse.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-06-25 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r445604627



##
File path: python/pyarrow/tests/test_scalars.py
##
@@ -17,426 +17,395 @@
 
 import datetime
 import pytest
-import unittest
 
 import numpy as np
 
 import pyarrow as pa
 
 
-class TestScalars(unittest.TestCase):
-
-def test_null_singleton(self):
-with pytest.raises(Exception):
-pa.NAType()
+@pytest.mark.parametrize(['value', 'ty', 'klass', 'deprecated'], [
+(None, None, pa.NullScalar, pa.NullType),
+(False, None, pa.BooleanScalar, pa.BooleanValue),
+(True, None, pa.BooleanScalar, pa.BooleanValue),
+(1, None, pa.Int64Scalar, pa.Int64Value),
+(-1, None, pa.Int64Scalar, pa.Int64Value),
+(1, pa.int8(), pa.Int8Scalar, pa.Int8Value),
+(1, pa.uint8(), pa.UInt8Scalar, pa.UInt8Value),
+(1, pa.int16(), pa.Int16Scalar, pa.Int16Value),
+(1, pa.uint16(), pa.UInt16Scalar, pa.UInt16Value),
+(1, pa.int32(), pa.Int32Scalar, pa.Int32Value),
+(1, pa.uint32(), pa.UInt32Scalar, pa.UInt32Value),
+(1, pa.int64(), pa.Int64Scalar, pa.Int64Value),
+(1, pa.uint64(), pa.UInt64Scalar, pa.UInt64Value),
+(1.0, None, pa.DoubleScalar, pa.DoubleValue),
+(np.float16(1.0), pa.float16(), pa.HalfFloatScalar, pa.HalfFloatValue),
+(1.0, pa.float32(), pa.FloatScalar, pa.FloatValue),
+("string", None, pa.StringScalar, pa.StringValue),
+(b"bytes", None, pa.BinaryScalar, pa.BinaryValue),
+([1, 2, 3], None, pa.ListScalar, pa.ListValue),
+([1, 2, 3, 4], pa.large_list(pa.int8()), pa.LargeListScalar,
+ pa.LargeListValue),
+# date
+# time
+])
+def test_type_inference(value, ty, klass, deprecated):
+s = pa.scalar(value, type=ty)
+assert isinstance(s, klass)
+assert s == value
+with pytest.warns(FutureWarning):
+isinstance(s, deprecated)
+
+
+def test_null_singleton():
+with pytest.raises(Exception):
+pa.NullScalar()
+
+
+def test_nulls():
+arr = pa.array([None, None])
+for v in arr:
+assert v is pa.NA
+assert v.as_py() is None
+
+
+def test_null_equality():
+assert (pa.NA == pa.NA) is pa.NA
+assert (pa.NA == 1) is pa.NA

Review comment:
   Great question, I assume it should follow the same semantics as `Null` 
does.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] kszucs commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

2020-06-25 Thread GitBox


kszucs commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r445603562



##
File path: python/pyarrow/util.py
##
@@ -41,6 +41,24 @@ def wrapper(*args, **kwargs):
 return wrapper
 
 
+def _deprecate_class(old_name, new_class, next_version,
+ instancecheck=True):
+"""
+Raise warning if a deprecated class is used in an isinstance check.

Review comment:
   > Really nice!
   > 
   > Could we add a `is_valid` attribute to the python scalar as well? Now the 
only way to check for a null value is to do `.as_py() is None` ?
   Definitely!
   > 
   > Might not be related to the changes in this PR, but reviewing this 
triggered me to test scalar casting:
   > 
   > ```
   > In [2]: s = pa.scalar(pd.Timestamp("2012-01-01")) 
   >...:  
   >...: import pyarrow.compute as pc 
   >...: pc.cast(s, pa.timestamp('ns'))  
   > ../src/arrow/compute/kernels/scalar_cast_temporal.cc:130:  Check failed: 
(batch[0].kind()) == (Datum::ARRAY) 
   > ...
   > Aborted (core dumped)
   > ```
   I think the cast kernel doesn't support scalars yet, on the other hand the 
scalars have custom `CastTo` implementation which we may want to remove in 
favor of `compute.Cast`?
   
   cc @bkietz 
   
   

##
File path: python/pyarrow/util.py
##
@@ -41,6 +41,24 @@ def wrapper(*args, **kwargs):
 return wrapper
 
 
+def _deprecate_class(old_name, new_class, next_version,
+ instancecheck=True):
+"""
+Raise warning if a deprecated class is used in an isinstance check.

Review comment:
   > Really nice!
   > 
   > Could we add a `is_valid` attribute to the python scalar as well? Now the 
only way to check for a null value is to do `.as_py() is None` ?
   
   Definitely!
   
   > 
   > Might not be related to the changes in this PR, but reviewing this 
triggered me to test scalar casting:
   > 
   > ```
   > In [2]: s = pa.scalar(pd.Timestamp("2012-01-01")) 
   >...:  
   >...: import pyarrow.compute as pc 
   >...: pc.cast(s, pa.timestamp('ns'))  
   > ../src/arrow/compute/kernels/scalar_cast_temporal.cc:130:  Check failed: 
(batch[0].kind()) == (Datum::ARRAY) 
   > ...
   > Aborted (core dumped)
   > ```
   I think the cast kernel doesn't support scalars yet, on the other hand the 
scalars have custom `CastTo` implementation which we may want to remove in 
favor of `compute.Cast`?
   
   cc @bkietz 
   
   





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org