felipecrv commented on code in PR #33641:
URL: https://github.com/apache/arrow/pull/33641#discussion_r1093933196


##########
cpp/src/arrow/array/validate.cc:
##########
@@ -622,6 +637,106 @@ struct ValidateArrayImpl {
     return Status::OK();
   }
 
+  template <typename RunEndsType>
+  Status ValidateRunEndEncoded(const RunEndEncodedType& type) {
+    // overflow was already checked at this point
+    if (data.offset + data.length > std::numeric_limits<RunEndsType>::max()) {
+      return Status::Invalid(
+          "Offset + length of an REE array must fit in a value of the run ends 
type ",
+          *type.run_ends_type(), ", but offset + length was ", data.offset + 
data.length,
+          " while the allowed maximum is ", 
std::numeric_limits<RunEndsType>::max());
+    }
+    if (!data.child_data[0]) {
+      return Status::Invalid("Run ends array is null pointer");
+    }
+    if (!data.child_data[1]) {
+      return Status::Invalid("Values array is null pointer");
+    }
+    const ArrayData& run_ends_data = *data.child_data[0];
+    const ArrayData& values_data = *data.child_data[1];
+    if (*run_ends_data.type != *type.run_ends_type()) {
+      return Status::Invalid("Run ends array of ", type, " must be ",
+                             *type.run_ends_type(), ", but is ", 
*run_ends_data.type);
+    }
+    if (values_data.type != type.encoded_type()) {
+      return Status::Invalid("Parent type says this array encodes ", 
*type.encoded_type(),
+                             " values, but values array has type ", 
*values_data.type);
+    }
+    const Status run_ends_valid = RecurseInto(run_ends_data);
+    if (!run_ends_valid.ok()) {
+      return Status::Invalid("Run ends array invalid: ", 
run_ends_valid.ToString());
+    }
+    const Status values_valid = RecurseInto(values_data);
+    if (!values_valid.ok()) {
+      return Status::Invalid("Values array invalid: ", 
values_valid.ToString());
+    }
+    if (data.null_count != 0) {
+      return Status::Invalid("Null count must be 0 for REE array, but was ",
+                             data.null_count);
+    }
+    if (run_ends_data.null_count != 0) {
+      return Status::Invalid("Null count must be 0 for run ends array, but was 
",
+                             run_ends_data.null_count);
+    }
+    if (!run_ends_data.buffers[1]->is_cpu()) {
+      return Status::NotImplemented("Validating non-CPU run ends buffers");
+    }
+    ArraySpan span(data);
+    const RunEndsType* run_ends = ree_util::RunEnds<RunEndsType>(span);
+    if (run_ends_data.length == 0) {
+      if (data.length == 0) {
+        return Status::OK();
+      } else {
+        return Status::Invalid("REE array has non-zero length ", data.length,
+                               ", but run ends array has zero length");
+      }
+    }
+    if (run_ends[run_ends_data.length - 1] < data.offset + data.length) {
+      return Status::Invalid(
+          "Last run in run ends array ends at ", run_ends[run_ends_data.length 
- 1],
+          " but this array requires at least ", data.offset + data.length, " 
(offset ",
+          data.offset, ", length ", data.length, ")");
+    }
+    if (full_validation && run_ends_data.length != 0) {
+      const int64_t run_ends_length = ree_util::RunEndsArray(span).length;
+      int64_t last_run_end = 0;
+      int64_t physical_offset = 0;
+      int64_t physical_end = 0;
+      for (int64_t index = 0; index < run_ends_length; index++) {
+        int64_t run_end = run_ends[index];
+        if (run_end < 1) {
+          return Status::Invalid(
+              "Run ends array invalid: All run ends must be a positive integer 
but run "
+              "end ",
+              index, " is ", run_end);
+        }
+        if (run_end <= last_run_end) {
+          return Status::Invalid(
+              "Run ends array invalid: Each run end must be greater than the 
prevous "
+              "one, but run end ",
+              index, " is ", run_end, " and run end ", index - 1, " is ", 
last_run_end);
+        }
+        if (run_end > data.offset && last_run_end <= data.offset) {
+          physical_offset = index;
+        }
+        if (run_end >= data.offset + data.length &&
+            last_run_end < data.offset + data.length) {
+          physical_end = index + 1;
+        }

Review Comment:
   I'm completely removing them as they are being calculated only to be 
compared on the asserts that I'm removing below.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to