felipecrv commented on code in PR #35345:
URL: https://github.com/apache/arrow/pull/35345#discussion_r1317980968


##########
cpp/src/arrow/array/validate.cc:
##########
@@ -699,55 +713,188 @@ struct ValidateArrayImpl {
     return Status::OK();
   }
 
+ private:
+  /// \pre basic validation has already been performed
+  template <typename offset_type>
+  Status FullyValidateOffsets(int64_t offset_limit) {
+    const auto* offsets = data.GetValues<offset_type>(1);
+    auto prev_offset = offsets[0];
+    if (prev_offset < 0) {
+      return Status::Invalid("Offset invariant failure: array starts at 
negative offset ",
+                             prev_offset);
+    }
+    for (int64_t i = 1; i <= data.length; ++i) {
+      const auto current_offset = offsets[i];
+      if (current_offset < prev_offset) {
+        return Status::Invalid("Offset invariant failure: non-monotonic offset 
at slot ",
+                               i, ": ", current_offset, " < ", prev_offset);
+      }
+      if (current_offset > offset_limit) {
+        return Status::Invalid("Offset invariant failure: offset for slot ", i,
+                               " out of bounds: ", current_offset, " > ", 
offset_limit);
+      }
+      prev_offset = current_offset;
+    }
+    return Status::OK();
+  }
+
+  enum ListViewValidationError {
+    kOk = 0,
+    kOutOfBoundsOffset = 1,
+    kOutOfBoundsSize = 2,
+  };
+
+  /// \pre basic validation has already been performed
+  template <typename offset_type>
+  std::pair<ListViewValidationError, int64_t> DoFullyValidateOffsetsAndSizes(
+      int64_t offset_limit) {
+    const auto* validity = data.GetValues<uint8_t>(0, 0);
+    const auto* offsets = data.GetValues<offset_type>(1);
+    const auto* sizes = data.GetValues<offset_type>(2);
+
+    int64_t slot = 0;
+    if (validity) {
+      internal::BitBlockCounter counter(validity, data.offset, data.length);
+      internal::BitBlockCount block;
+      for (int64_t i = 0; i < data.length; i += block.length) {
+        block = counter.NextWord();
+        if (block.NoneSet()) {
+          continue;
+        }
+        const bool all_set = block.AllSet();
+        for (int j = 0; j < block.length; j++) {
+          slot = i + j;
+          const bool valid = all_set || bit_util::GetBit(validity, data.offset 
+ slot);
+          if (valid) {
+            const auto size = sizes[slot];
+            if (size > 0) {
+              const auto offset = offsets[slot];
+              if (offset < 0 || offset > offset_limit) {
+                return {kOutOfBoundsOffset, slot};
+              }
+              if (offset + size > offset_limit) {

Review Comment:
   Good catch. I can rewrite this to `if (size > offset_limit - offset)` that 
has no under/overflow issues because `offset <= offset_limit` after the 
conditional above has been judged `false`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to