bkietz commented on code in PR #35345:
URL: https://github.com/apache/arrow/pull/35345#discussion_r1319933692
##########
cpp/src/arrow/array/validate.cc:
##########
@@ -699,55 +713,179 @@ struct ValidateArrayImpl {
return Status::OK();
}
+ private:
+ /// \pre basic validation has already been performed
+ template <typename offset_type>
+ Status FullyValidateOffsets(int64_t offset_limit) {
+ const auto* offsets = data.GetValues<offset_type>(1);
+ auto prev_offset = offsets[0];
+ if (prev_offset < 0) {
+ return Status::Invalid("Offset invariant failure: array starts at
negative offset ",
+ prev_offset);
+ }
+ for (int64_t i = 1; i <= data.length; ++i) {
+ const auto current_offset = offsets[i];
+ if (current_offset < prev_offset) {
+ return Status::Invalid("Offset invariant failure: non-monotonic offset
at slot ",
+ i, ": ", current_offset, " < ", prev_offset);
+ }
+ if (current_offset > offset_limit) {
+ return Status::Invalid("Offset invariant failure: offset for slot ", i,
+ " out of bounds: ", current_offset, " > ",
offset_limit);
+ }
+ prev_offset = current_offset;
+ }
+ return Status::OK();
+ }
+
+ enum ListViewValidationError {
+ kOk = 0,
+ kOutOfBoundsOffset = 1,
+ kOutOfBoundsSize = 2,
+ };
Review Comment:
```suggestion
```
##########
cpp/src/arrow/array/concatenate.cc:
##########
@@ -602,8 +691,11 @@ class ConcatenateImpl {
} // namespace
Result<std::shared_ptr<Array>> Concatenate(const ArrayVector& arrays,
MemoryPool* pool) {
- if (arrays.size() == 0) {
- return Status::Invalid("Must pass at least one array");
+ switch (arrays.size()) {
Review Comment:
:ok_hand:
##########
cpp/src/arrow/array/concatenate.cc:
##########
@@ -248,6 +305,38 @@ class ConcatenateImpl {
return ConcatenateImpl(child_data,
pool_).Concatenate(&out_->child_data[0]);
}
+ template <typename T>
+ enable_if_list_view<T, Status> Visit(const T& type) {
+ using offset_type = typename T::offset_type;
+ out_->buffers.resize(3);
+ out_->child_data.resize(1);
+
+ // Calculate the ranges of values that each list-view array uses
+ std::vector<Range> value_ranges;
+ value_ranges.reserve(in_.size());
+ for (const auto& input : in_) {
+ ArraySpan input_span(*input);
+ Range range;
+ ARROW_ASSIGN_OR_RAISE(std::tie(range.offset, range.length),
+
list_util::internal::RangeOfValuesUsed(input_span));
+ value_ranges.push_back(range);
Review Comment:
(Not actually a suggestion for this PR, just speculation about future
helpers) what would you think of adding a macro for `ASSIGN_OR_RAISE` with
structured binding?
```suggestion
ARROW_ASSIGN_OR_RAISE_LIST((offset, length),
list_util::internal::RangeOfValuesUsed(input_span));
value_ranges.push_back({offset, length});
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]