HuaHuaY commented on code in PR #50160:
URL: https://github.com/apache/arrow/pull/50160#discussion_r3419343175
##########
cpp/src/parquet/arrow/writer.cc:
##########
@@ -169,13 +170,24 @@ class ArrowColumnWriterV2 {
leaf_idx, ctx, [&](const MultipathLevelBuilderResult& result) {
size_t visited_component_size =
result.post_list_visited_elements.size();
DCHECK_GT(visited_component_size, 0);
- if (visited_component_size != 1) {
- return Status::NotImplemented(
- "Lists with non-zero length null components are not
supported");
+ std::shared_ptr<Array> values_array;
+ if (visited_component_size == 1) {
+ const ElementRange& range =
result.post_list_visited_elements[0];
+ values_array = result.leaf_array->Slice(range.start,
range.Size());
+ } else {
+ // Multiple leaf ranges can be produced when child values are
+ // skipped, such as null fixed-size-list slots, or when
+ // list-view ranges are non-contiguous. Concatenate the slices
+ // in logical write order.
+ ::arrow::ArrayVector arrays;
+ arrays.reserve(visited_component_size);
+ for (const auto& range : result.post_list_visited_elements) {
+ DCHECK(!range.Empty());
+ arrays.push_back(result.leaf_array->Slice(range.start,
range.Size()));
+ }
+ ARROW_ASSIGN_OR_RAISE(values_array,
+ ::arrow::Concatenate(arrays,
ctx->memory_pool));
Review Comment:
It's difficult to write data without constructing a new array because we
don't know the definition level and repetition level ranges for each segment of
the array.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]