pitrou commented on code in PR #50160:
URL: https://github.com/apache/arrow/pull/50160#discussion_r3419373685
##########
cpp/src/parquet/arrow/writer.cc:
##########
@@ -169,13 +170,24 @@ class ArrowColumnWriterV2 {
leaf_idx, ctx, [&](const MultipathLevelBuilderResult& result) {
size_t visited_component_size =
result.post_list_visited_elements.size();
DCHECK_GT(visited_component_size, 0);
- if (visited_component_size != 1) {
- return Status::NotImplemented(
- "Lists with non-zero length null components are not
supported");
+ std::shared_ptr<Array> values_array;
+ if (visited_component_size == 1) {
+ const ElementRange& range =
result.post_list_visited_elements[0];
+ values_array = result.leaf_array->Slice(range.start,
range.Size());
+ } else {
+ // Multiple leaf ranges can be produced when child values are
+ // skipped, such as null fixed-size-list slots, or when
+ // list-view ranges are non-contiguous. Concatenate the slices
+ // in logical write order.
+ ::arrow::ArrayVector arrays;
+ arrays.reserve(visited_component_size);
+ for (const auto& range : result.post_list_visited_elements) {
+ DCHECK(!range.Empty());
+ arrays.push_back(result.leaf_array->Slice(range.start,
range.Size()));
+ }
+ ARROW_ASSIGN_OR_RAISE(values_array,
+ ::arrow::Concatenate(arrays,
ctx->memory_pool));
Review Comment:
> Maybe we can add two offsets in `ElementRange` to implement this.
We can, but perhaps that can be left as a later optimization.
(also, not sure it's a good idea if there are many `ElementRange`s in the
first place)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]