bkietz commented on a change in pull request #10871:
URL: https://github.com/apache/arrow/pull/10871#discussion_r685360313



##########
File path: cpp/src/arrow/array/array_test.cc
##########
@@ -407,6 +407,35 @@ TEST_F(TestArray, TestMakeArrayOfNullUnion) {
   }
 }
 
+TEST_F(TestArray, TestValidateNullCount) {
+  Int32Builder builder(pool_);
+  ASSERT_OK(builder.Append(5));
+  ASSERT_OK(builder.Append(42));
+  ASSERT_OK(builder.AppendNull());
+  ASSERT_OK_AND_ASSIGN(auto array, builder.Finish());
+
+  ArrayData* data = array->data().get();
+  data->null_count = -1;

Review comment:
       also here:
   ```suggestion
     data->null_count = kUnknownNullCount;
   ```

##########
File path: cpp/src/arrow/array/validate.cc
##########
@@ -637,6 +638,23 @@ struct ValidateArrayFullImpl {
 
 ARROW_EXPORT
 Status ValidateArrayFull(const ArrayData& data) {
+  if (data.null_count != -1) {
+    int64_t actual_null_count;
+    if (HasValidityBitmap(data.type->id()) && data.buffers[0]) {

Review comment:
       I notice ValidateArray doesn't check for `data.type == nullptr`, is that 
worth adding in this PR?

##########
File path: cpp/src/arrow/array/validate.cc
##########
@@ -637,6 +638,23 @@ struct ValidateArrayFullImpl {
 
 ARROW_EXPORT
 Status ValidateArrayFull(const ArrayData& data) {
+  if (data.null_count != -1) {
+    int64_t actual_null_count;
+    if (HasValidityBitmap(data.type->id()) && data.buffers[0]) {
+      // Do not call GetNullCount() as it would also set the `null_count` 
member
+      actual_null_count =
+          data.length - CountSetBits(data.buffers[0]->data(), data.offset, 
data.length);
+    } else if (data.type->id() == Type::NA) {
+      actual_null_count = data.length;

Review comment:
       This is only enforced by `arrow::NullArray::SetData`; it's possible that 
`data.null_count != data.length` which should be an error for `Type::NA`. Going 
a step further, we might want to just always require that `data.null_count == 
data.length` for `Type::NA` instead of also allowing `kUnknownNullCount`

##########
File path: cpp/src/arrow/array/validate.cc
##########
@@ -637,6 +638,23 @@ struct ValidateArrayFullImpl {
 
 ARROW_EXPORT
 Status ValidateArrayFull(const ArrayData& data) {
+  if (data.null_count != -1) {

Review comment:
       Nit:
   ```suggestion
     if (data.null_count != kUnknownNullCount) {
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to