This is an automated email from the ASF dual-hosted git repository.
maplefu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 48d5151b87 GH-45151: [C++][Parquet] Fix Null-dereference READ in
parquet::arrow::ListToSchemaField (#45152)
48d5151b87 is described below
commit 48d5151b87f1b8f977344c7ac20cb0810e46f733
Author: mwish <[email protected]>
AuthorDate: Fri Jan 3 23:56:03 2025 +0800
GH-45151: [C++][Parquet] Fix Null-dereference READ in
parquet::arrow::ListToSchemaField (#45152)
### Rationale for this change
Fix Null-dereference READ in parquet::arrow::ListToSchemaField
### What changes are included in this PR?
Add a rule check before parquet::arrow::ListToSchemaField
### Are these changes tested?
Yes
### Are there any user-facing changes?
Bugfix
* GitHub Issue: #45151
Lead-authored-by: mwish <[email protected]>
Co-authored-by: mwish <[email protected]>
Co-authored-by: Gang Wu <[email protected]>
Signed-off-by: mwish <[email protected]>
---
cpp/src/parquet/arrow/arrow_schema_test.cc | 21 +++++++++++++++++++++
cpp/src/parquet/arrow/schema.cc | 11 ++++++++---
2 files changed, 29 insertions(+), 3 deletions(-)
diff --git a/cpp/src/parquet/arrow/arrow_schema_test.cc
b/cpp/src/parquet/arrow/arrow_schema_test.cc
index a6e04e5425..535efa0c8e 100644
--- a/cpp/src/parquet/arrow/arrow_schema_test.cc
+++ b/cpp/src/parquet/arrow/arrow_schema_test.cc
@@ -832,6 +832,27 @@ TEST_F(TestConvertParquetSchema,
IllegalParquetNestedSchema) {
Invalid, testing::HasSubstr("LIST-annotated groups must not be
repeated."),
ConvertSchema(parquet_fields));
}
+ // List<List<>>: outer list is two-level encoding, inner list is empty.
+ //
+ // optional group my_list (LIST) {
+ // repeated group array (LIST) {
+ // repeated group list {
+ // }
+ // }
+ // }
+ {
+ auto list = GroupNode::Make("list", Repetition::REPEATED, {});
+ auto array =
+ GroupNode::Make("array", Repetition::REPEATED, {list},
ConvertedType::LIST);
+ std::vector<NodePtr> parquet_fields;
+ parquet_fields.push_back(
+ GroupNode::Make("my_list", Repetition::OPTIONAL, {array},
ConvertedType::LIST));
+
+ EXPECT_RAISES_WITH_MESSAGE_THAT(
+ Invalid,
+ testing::HasSubstr("LIST-annotated groups must have at least one
child."),
+ ConvertSchema(parquet_fields));
+ }
}
Status ArrowSchemaToParquetMetadata(std::shared_ptr<::arrow::Schema>&
arrow_schema,
diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc
index c19e2b9e48..d94c73452c 100644
--- a/cpp/src/parquet/arrow/schema.cc
+++ b/cpp/src/parquet/arrow/schema.cc
@@ -676,9 +676,14 @@ Status ListToSchemaField(const GroupNode& group, LevelInfo
current_levels,
return Status::Invalid("Group with one repeated child must be
LIST-annotated.");
}
// LIST-annotated group with three-level encoding cannot be repeated.
- if (repeated_field->is_group() &&
- !static_cast<const
GroupNode&>(*repeated_field).field(0)->is_repeated()) {
- return Status::Invalid("LIST-annotated groups must not be
repeated.");
+ if (repeated_field->is_group()) {
+ auto& repeated_group_field = static_cast<const
GroupNode&>(*repeated_field);
+ if (repeated_group_field.field_count() == 0) {
+ return Status::Invalid("LIST-annotated groups must have at least
one child.");
+ }
+ if (!repeated_group_field.field(0)->is_repeated()) {
+ return Status::Invalid("LIST-annotated groups must not be
repeated.");
+ }
}
RETURN_NOT_OK(
NodeToSchemaField(*repeated_field, current_levels, ctx, out,
child_field));