felipecrv commented on code in PR #35345:
URL: https://github.com/apache/arrow/pull/35345#discussion_r1388533669
##########
cpp/src/arrow/array/validate.cc:
##########
@@ -797,57 +811,147 @@ struct ValidateArrayImpl {
return Status::OK();
}
+ private:
+ /// \pre basic validation has already been performed
+ template <typename offset_type>
+ Status FullyValidateOffsets(int64_t offset_limit) {
+ const auto* offsets = data.GetValues<offset_type>(1);
+ auto prev_offset = offsets[0];
+ if (prev_offset < 0) {
+ return Status::Invalid("Offset invariant failure: array starts at
negative offset ",
+ prev_offset);
+ }
+ for (int64_t i = 1; i <= data.length; ++i) {
+ const auto current_offset = offsets[i];
+ if (current_offset < prev_offset) {
+ return Status::Invalid("Offset invariant failure: non-monotonic offset
at slot ",
+ i, ": ", current_offset, " < ", prev_offset);
+ }
+ if (current_offset > offset_limit) {
+ return Status::Invalid("Offset invariant failure: offset for slot ", i,
+ " out of bounds: ", current_offset, " > ",
offset_limit);
+ }
+ prev_offset = current_offset;
+ }
+ return Status::OK();
+ }
+
+ template <typename offset_type>
+ Status OutOfBoundsListViewOffset(int64_t slot, int64_t offset_limit) {
+ const auto* offsets = data.GetValues<offset_type>(1);
+ const auto offset = offsets[slot];
+ return Status::Invalid("Offset invariant failure: offset for slot ", slot,
+ " out of bounds. Expected ", offset,
+ " to be at least 0 and less than ", offset_limit);
+ }
+
+ template <typename offset_type>
+ Status OutOfBoundsListViewSize(int64_t slot, int64_t offset_limit) {
+ const auto* offsets = data.GetValues<offset_type>(1);
+ const auto* sizes = data.GetValues<offset_type>(2);
+ const auto size = sizes[slot];
+ if (size < 0) {
+ return Status::Invalid("Offset invariant failure: size for slot ", slot,
+ " out of bounds: ", size, " < 0");
+ } else {
+ const auto offset = offsets[slot];
+ return Status::Invalid("Offset invariant failure: size for slot ", slot,
+ " out of bounds: ", offset, " + ", size, " > ",
+ offset_limit);
+ }
+ }
+
+ /// \pre basic validation has already been performed
+ template <typename offset_type>
+ Status FullyValidateOffsetsAndSizes(int64_t offset_limit) {
+ const auto* offsets = data.GetValues<offset_type>(1);
+ const auto* sizes = data.GetValues<offset_type>(2);
+
+ for (int64_t i = 0; i < data.length; ++i) {
+ const auto size = sizes[i];
+ if (size >= 0) {
+ const auto offset = offsets[i];
+ if (offset < 0 || offset > offset_limit) {
+ return OutOfBoundsListViewOffset<offset_type>(i, offset_limit);
+ }
+ if (size > offset_limit - offset) {
+ return OutOfBoundsListViewSize<offset_type>(i, offset_limit);
+ }
+ } else {
+ return OutOfBoundsListViewSize<offset_type>(i, offset_limit);
+ }
+ }
+
+ return Status::OK();
+ }
+
template <typename TypeClass>
- Status ValidateOffsets(const TypeClass& type, int64_t offset_limit) {
+ Status ValidateOffsetsAndMaybeSizes(const TypeClass&, int64_t offset_limit) {
using offset_type = typename TypeClass::offset_type;
+ constexpr bool is_list_view = is_list_view_type<TypeClass>::value;
- if (!IsBufferValid(1)) {
- // For length 0, an empty offsets buffer seems accepted as a special case
- // (ARROW-544)
- if (data.length > 0) {
- return Status::Invalid("Non-empty array but offsets are null");
+ const bool non_empty = data.length > 0;
+ if constexpr (is_list_view) {
+ if (!IsBufferValid(1)) {
+ // For length 0, an empty offsets buffer is accepted (ARROW-544).
+ return Status::Invalid("offsets buffer is null");
+ }
+ if (!IsBufferValid(2)) {
+ return Status::Invalid("sizes buffer is null");
+ }
+ } else {
+ if (!IsBufferValid(1)) {
+ // For length 0, an empty offsets buffer is accepted (ARROW-544).
+ return non_empty ? Status::Invalid("Non-empty array but offsets are
null")
+ : Status::OK();
}
- return Status::OK();
}
- // An empty list array can have 0 offsets
const auto offsets_byte_size = data.buffers[1]->size();
const auto required_offsets = ((data.length > 0) || (offsets_byte_size >
0))
- ? data.length + data.offset + 1
+ ? data.length + data.offset +
(is_list_view ? 0 : 1)
: 0;
if (offsets_byte_size / static_cast<int32_t>(sizeof(offset_type)) <
required_offsets) {
return Status::Invalid("Offsets buffer size (bytes): ",
offsets_byte_size,
" isn't large enough for length: ", data.length,
" and offset: ", data.offset);
}
+ if constexpr (is_list_view) {
+ const auto required_sizes = data.length + data.offset;
+ const auto sizes_bytes_size = data.buffers[2]->size();
+ if (sizes_bytes_size / static_cast<int32_t>(sizeof(offset_type)) <
required_sizes) {
+ return Status::Invalid("Sizes buffer size (bytes): ", sizes_bytes_size,
+ " isn't large enough for length: ", data.length,
+ " and offset: ", data.offset);
+ }
+ }
if (full_validation && required_offsets > 0) {
- // Validate all offset values
- const offset_type* offsets = data.GetValues<offset_type>(1);
-
- auto prev_offset = offsets[0];
- if (prev_offset < 0) {
- return Status::Invalid(
- "Offset invariant failure: array starts at negative offset ",
prev_offset);
- }
- for (int64_t i = 1; i <= data.length; ++i) {
- const auto current_offset = offsets[i];
- if (current_offset < prev_offset) {
- return Status::Invalid(
- "Offset invariant failure: non-monotonic offset at slot ", i, ":
",
- current_offset, " < ", prev_offset);
- }
- if (current_offset > offset_limit) {
- return Status::Invalid("Offset invariant failure: offset for slot ",
i,
- " out of bounds: ", current_offset, " > ",
offset_limit);
- }
- prev_offset = current_offset;
+ if constexpr (is_list_view) {
+ return FullyValidateOffsetsAndSizes<offset_type>(offset_limit);
+ } else {
+ return FullyValidateOffsets<offset_type>(offset_limit);
}
}
return Status::OK();
}
+ public:
+ template <typename TypeClass>
+ enable_if_list_view<TypeClass, Status> ValidateOffsetsAndSizes(const
TypeClass& type,
+ int64_t
offset_limit) {
+ return ValidateOffsetsAndMaybeSizes<TypeClass>(type, offset_limit);
+ }
+
+ template <typename TypeClass>
+ std::enable_if_t<is_var_length_list_type<TypeClass>::value ||
+ is_base_binary_like(TypeClass::type_id),
+ Status>
+ ValidateOffsets(const TypeClass& type, int64_t offset_limit) {
+ return ValidateOffsetsAndMaybeSizes<TypeClass>(type, offset_limit);
+ }
Review Comment:
Two reasons:
- Provide a simple public interface to the class
- Constrain the types that can be passed as template parameters while all
the internal private functions can assume the passed types are valid
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]