pitrou commented on a change in pull request #10862:
URL: https://github.com/apache/arrow/pull/10862#discussion_r682716498



##########
File path: cpp/src/arrow/scalar.cc
##########
@@ -152,10 +154,261 @@ struct ScalarHashImpl {
   size_t hash_;
 };
 
+// Implementation of Scalar::Validate() and Scalar::ValidateFull()
+struct ScalarValidateImpl {
+  const bool full_validation_;
+
+  explicit ScalarValidateImpl(bool full_validation) : 
full_validation_(full_validation) {
+    ::arrow::util::InitializeUTF8();
+  }
+
+  Status Validate(const Scalar& scalar) {
+    if (!scalar.type) {
+      return Status::Invalid("scalar lacks a type");
+    }
+    return VisitScalarInline(scalar, this);
+  }
+
+  Status Visit(const NullScalar& s) {
+    if (s.is_valid) {
+      return Status::Invalid("null scalar should have is_valid = false");
+    }
+    return Status::OK();
+  }
+
+  template <typename T>
+  Status Visit(const internal::PrimitiveScalar<T>& s) {
+    return Status::OK();
+  }
+
+  Status Visit(const BaseBinaryScalar& s) { return ValidateBinaryScalar(s); }
+
+  Status Visit(const StringScalar& s) { return ValidateStringScalar(s); }
+
+  Status Visit(const LargeStringScalar& s) { return ValidateStringScalar(s); }
+
+  Status Visit(const FixedSizeBinaryScalar& s) {
+    RETURN_NOT_OK(ValidateBinaryScalar(s));
+    if (s.is_valid) {
+      const auto& byte_width =
+          checked_cast<const FixedSizeBinaryType&>(*s.type).byte_width();
+      if (s.value->size() != byte_width) {
+        return Status::Invalid(s.type->ToString(), " scalar should have a 
value of size ",
+                               byte_width, ", got ", s.value->size());
+      }
+    }
+    return Status::OK();
+  }
+
+  Status Visit(const Decimal128Scalar& s) {
+    // XXX validate precision?
+    return Status::OK();
+  }
+
+  Status Visit(const Decimal256Scalar& s) {
+    // XXX validate precision?
+    return Status::OK();
+  }
+
+  Status Visit(const BaseListScalar& s) { return ValidateBaseListScalar(s); }
+
+  Status Visit(const FixedSizeListScalar& s) {
+    RETURN_NOT_OK(ValidateBaseListScalar(s));
+    if (s.is_valid) {
+      const auto& list_type = checked_cast<const FixedSizeListType&>(*s.type);
+      if (s.value->length() != list_type.list_size()) {
+        return Status::Invalid(s.type->ToString(),
+                               " scalar should have a child value of length ",
+                               list_type.list_size(), ", got ", 
s.value->length());
+      }
+    }
+    return Status::OK();
+  }
+
+  Status Visit(const StructScalar& s) {
+    if (!s.is_valid) {
+      if (!s.value.empty()) {
+        return Status::Invalid(s.type->ToString(),
+                               " scalar is marked null but has child values");
+      }
+      return Status::OK();
+    }
+    const int num_fields = s.type->num_fields();
+    const auto& fields = s.type->fields();
+    if (fields.size() != s.value.size()) {
+      return Status::Invalid("non-null ", s.type->ToString(), " scalar should 
have ",
+                             num_fields, " child values, got ", 
s.value.size());
+    }
+    for (int i = 0; i < num_fields; ++i) {
+      if (!s.value[i]) {
+        return Status::Invalid("non-null ", s.type->ToString(),
+                               " scalar has missing child value at index ", i);
+      }
+      const auto st = Validate(*s.value[i]);
+      if (!st.ok()) {
+        return st.WithMessage(s.type->ToString(),
+                              " scalar fails validation for child at index ", 
i, ": ",
+                              st.message());
+      }
+      if (!s.value[i]->type->Equals(*fields[i]->type())) {
+        return Status::Invalid(
+            s.type->ToString(), " scalar should have a child value of type ",
+            fields[i]->type()->ToString(), "at index ", i, ", got ", 
s.value[i]->type);
+      }
+    }
+    return Status::OK();
+  }
+
+  Status Visit(const DictionaryScalar& s) {
+    const auto& dict_type = checked_cast<const DictionaryType&>(*s.type);
+    // Validate index
+    if (!s.value.index) {
+      return Status::Invalid(s.type->ToString(), " scalar doesn't have an 
index value");
+    }
+    {
+      const auto st = Validate(*s.value.index);
+      if (!st.ok()) {
+        return st.WithMessage(s.type->ToString(),
+                              " scalar fails validation for index value: ", 
st.message());
+      }
+    }
+    if (!s.value.index->type->Equals(*dict_type.index_type())) {
+      return Status::Invalid(
+          s.type->ToString(), " scalar should have an index value of type ",
+          dict_type.index_type()->ToString(), ", got ", 
s.value.index->type->ToString());
+    }
+    if (s.is_valid != s.value.index->is_valid) {
+      if (s.is_valid) {
+        return Status::Invalid("non-null ", s.type->ToString(),
+                               " scalar has null index value");
+      } else {
+        return Status::Invalid("null ", s.type->ToString(),
+                               " scalar has non-null index value");
+      }
+    }
+
+    // Validate dictionary
+    if (!s.value.dictionary) {
+      return Status::Invalid(s.type->ToString(),
+                             " scalar doesn't have a dictionary value");
+    }
+    {
+      const auto st = full_validation_ ? s.value.dictionary->ValidateFull()
+                                       : s.value.dictionary->Validate();
+      if (!st.ok()) {
+        return st.WithMessage(
+            s.type->ToString(),
+            " scalar fails validation for dictionary value: ", st.message());
+      }
+    }
+    if (!s.value.dictionary->type()->Equals(*dict_type.value_type())) {
+      return Status::Invalid(s.type->ToString(),
+                             " scalar should have a dictionary value of type ",
+                             dict_type.value_type()->ToString(), ", got ",
+                             s.value.dictionary->type()->ToString());
+    }
+
+    // XXX check index is in bounds?
+    return Status::OK();
+  }
+
+  Status Visit(const UnionScalar& s) {
+    RETURN_NOT_OK(ValidateOptionalValue(s));
+    const int type_code = s.type_code;  // avoid 8-bit int types for priting
+    const auto& union_type = checked_cast<const UnionType&>(*s.type);
+    const auto& child_ids = union_type.child_ids();
+    if (type_code < 0 || type_code >= static_cast<int64_t>(child_ids.size()) ||
+        child_ids[type_code] == UnionType::kInvalidChildId) {

Review comment:
       `child_ids` is a mapping of type codes to (physical) child ids. What you 
are thinking about is `type_codes`, which is a mapping of (physical) child ids 
to type codes.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to