pitrou commented on a change in pull request #8652:
URL: https://github.com/apache/arrow/pull/8652#discussion_r522344042



##########
File path: cpp/src/arrow/array/validate.cc
##########
@@ -38,195 +39,172 @@ namespace internal {
 
 namespace {
 
-struct ValidateArrayVisitor {
-  Status Visit(const NullArray& array) {
-    ARROW_RETURN_IF(array.null_count() != array.length(),
-                    Status::Invalid("null_count is invalid"));
-    return Status::OK();
-  }
+struct ArrayDataValidator {
+  const ArrayData& data;
 
-  Status Visit(const PrimitiveArray& array) {
-    if (array.length() > 0) {
-      if (array.data()->buffers[1] == nullptr) {
-        return Status::Invalid("values buffer is null");
-      }
-      if (array.values() == nullptr) {
-        return Status::Invalid("values is null");
-      }
-    }
-    return Status::OK();
-  }
+  Status Validate() { return ValidateWithType(*data.type); }
+
+  Status ValidateWithType(const DataType& type) { return VisitTypeInline(type, 
this); }
 
-  Status Visit(const Decimal128Array& array) {
-    if (array.length() > 0 && array.values() == nullptr) {
-      return Status::Invalid("values is null");
+  Status Visit(const NullType&) {
+    if (data.null_count != data.length) {
+      return Status::Invalid("Null array null_count unequal to its length");
     }
     return Status::OK();
   }
 
-  Status Visit(const Decimal256Array& array) {
-    if (array.length() > 0 && array.values() == nullptr) {
-      return Status::Invalid("values is null");
+  Status Visit(const FixedWidthType&) {
+    if (data.length > 0) {
+      if (!IsBufferValid(1)) {
+        return Status::Invalid("Missing values buffer in non-empty array");
+      }
     }
     return Status::OK();
   }
 
-  Status Visit(const StringArray& array) { return ValidateBinaryArray(array); }
+  Status Visit(const StringType& type) { return ValidateBinaryLike(type); }
 
-  Status Visit(const BinaryArray& array) { return ValidateBinaryArray(array); }
+  Status Visit(const BinaryType& type) { return ValidateBinaryLike(type); }
 
-  Status Visit(const LargeStringArray& array) { return 
ValidateBinaryArray(array); }
+  Status Visit(const LargeStringType& type) { return ValidateBinaryLike(type); 
}
 
-  Status Visit(const LargeBinaryArray& array) { return 
ValidateBinaryArray(array); }
+  Status Visit(const LargeBinaryType& type) { return ValidateBinaryLike(type); 
}
 
-  Status Visit(const ListArray& array) { return ValidateListArray(array); }
+  Status Visit(const ListType& type) { return ValidateListLike(type); }
 
-  Status Visit(const LargeListArray& array) { return ValidateListArray(array); 
}
+  Status Visit(const LargeListType& type) { return ValidateListLike(type); }
 
-  Status Visit(const MapArray& array) {
-    if (!array.keys()) {
-      return Status::Invalid("keys is null");
-    }
-    return ValidateListArray(array);
-  }
+  Status Visit(const MapType& type) { return ValidateListLike(type); }
 
-  Status Visit(const FixedSizeListArray& array) {
-    const int64_t len = array.length();
-    const int64_t value_size = array.value_length();
-    if (len > 0 && !array.values()) {
-      return Status::Invalid("values is null");
-    }
-    if (value_size < 0) {
-      return Status::Invalid("FixedSizeListArray has negative value size ", 
value_size);
+  Status Visit(const FixedSizeListType& type) {
+    const ArrayData& values = *data.child_data[0];
+    const int64_t list_size = type.list_size();
+    if (list_size < 0) {
+      return Status::Invalid("Fixed size list has negative list size");
     }
+
     int64_t expected_values_length = -1;
-    if (MultiplyWithOverflow(len, value_size, &expected_values_length) ||
-        array.values()->length() != expected_values_length) {
-      return Status::Invalid("Values Length (", array.values()->length(),
-                             ") is not equal to the length (", len,
-                             ") multiplied by the value size (", value_size, 
")");
+    if (MultiplyWithOverflow(data.length, list_size, &expected_values_length) 
||
+        values.length != expected_values_length) {
+      return Status::Invalid("Values length (", values.length,
+                             ") is not equal to the length (", data.length,
+                             ") multiplied by the value size (", list_size, 
")");
+    }
+
+    const Status child_valid = ValidateArray(values);
+    if (!child_valid.ok()) {
+      return Status::Invalid("Fixed size list child array invalid: ",
+                             child_valid.ToString());
     }
 
     return Status::OK();
   }
 
-  Status Visit(const StructArray& array) {
-    const auto& struct_type = checked_cast<const StructType&>(*array.type());
-    // Validate fields
-    for (int i = 0; i < array.num_fields(); ++i) {
-      // array.field() may crash due to an assertion in ArrayData::Slice(),
-      // so check invariants before
-      const auto& field_data = *array.data()->child_data[i];
-      if (field_data.length < array.offset()) {
-        return Status::Invalid("Struct child array #", i,
-                               " has length smaller than struct array offset 
(",
-                               field_data.length, " < ", array.offset(), ")");
-      }
+  Status Visit(const StructType& type) {
+    for (int i = 0; i < type.num_fields(); ++i) {
+      const auto& field_data = *data.child_data[i];
 
-      auto it = array.field(i);
-      if (it->length() != array.length()) {
+      // Validate child first, to catch nonsensical length / offset etc.
+      const Status field_valid = ValidateArray(field_data);
+      if (!field_valid.ok()) {
         return Status::Invalid("Struct child array #", i,
-                               " has length different from struct array (", 
it->length(),
-                               " != ", array.length(), ")");
+                               " invalid: ", field_valid.ToString());
       }
 
-      auto it_type = struct_type.field(i)->type();
-      if (!it->type()->Equals(it_type)) {
+      if (field_data.length < data.length + data.offset) {
         return Status::Invalid("Struct child array #", i,
-                               " does not match type field: ", 
it->type()->ToString(),
-                               " vs ", it_type->ToString());
+                               " has length smaller than struct array (",
+                               field_data.length, " != ", data.length + 
data.offset, ")");

Review comment:
       I'll commit an alternate suggestion.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to