This is an automated email from the ASF dual-hosted git repository.

maplefu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 48d5151b87 GH-45151: [C++][Parquet] Fix Null-dereference READ in 
parquet::arrow::ListToSchemaField (#45152)
48d5151b87 is described below

commit 48d5151b87f1b8f977344c7ac20cb0810e46f733
Author: mwish <[email protected]>
AuthorDate: Fri Jan 3 23:56:03 2025 +0800

    GH-45151: [C++][Parquet] Fix Null-dereference READ in 
parquet::arrow::ListToSchemaField (#45152)
    
    
    
    ### Rationale for this change
    
    Fix Null-dereference READ in parquet::arrow::ListToSchemaField
    
    ### What changes are included in this PR?
    
    Add a rule check before parquet::arrow::ListToSchemaField
    
    ### Are these changes tested?
    
    Yes
    
    ### Are there any user-facing changes?
    
    Bugfix
    
    * GitHub Issue: #45151
    
    Lead-authored-by: mwish <[email protected]>
    Co-authored-by: mwish <[email protected]>
    Co-authored-by: Gang Wu <[email protected]>
    Signed-off-by: mwish <[email protected]>
---
 cpp/src/parquet/arrow/arrow_schema_test.cc | 21 +++++++++++++++++++++
 cpp/src/parquet/arrow/schema.cc            | 11 ++++++++---
 2 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/cpp/src/parquet/arrow/arrow_schema_test.cc 
b/cpp/src/parquet/arrow/arrow_schema_test.cc
index a6e04e5425..535efa0c8e 100644
--- a/cpp/src/parquet/arrow/arrow_schema_test.cc
+++ b/cpp/src/parquet/arrow/arrow_schema_test.cc
@@ -832,6 +832,27 @@ TEST_F(TestConvertParquetSchema, 
IllegalParquetNestedSchema) {
         Invalid, testing::HasSubstr("LIST-annotated groups must not be 
repeated."),
         ConvertSchema(parquet_fields));
   }
+  // List<List<>>: outer list is two-level encoding, inner list is empty.
+  //
+  // optional group my_list (LIST) {
+  //   repeated group array (LIST) {
+  //     repeated group list {
+  //     }
+  //   }
+  // }
+  {
+    auto list = GroupNode::Make("list", Repetition::REPEATED, {});
+    auto array =
+        GroupNode::Make("array", Repetition::REPEATED, {list}, 
ConvertedType::LIST);
+    std::vector<NodePtr> parquet_fields;
+    parquet_fields.push_back(
+        GroupNode::Make("my_list", Repetition::OPTIONAL, {array}, 
ConvertedType::LIST));
+
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid,
+        testing::HasSubstr("LIST-annotated groups must have at least one 
child."),
+        ConvertSchema(parquet_fields));
+  }
 }
 
 Status ArrowSchemaToParquetMetadata(std::shared_ptr<::arrow::Schema>& 
arrow_schema,
diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc
index c19e2b9e48..d94c73452c 100644
--- a/cpp/src/parquet/arrow/schema.cc
+++ b/cpp/src/parquet/arrow/schema.cc
@@ -676,9 +676,14 @@ Status ListToSchemaField(const GroupNode& group, LevelInfo 
current_levels,
           return Status::Invalid("Group with one repeated child must be 
LIST-annotated.");
         }
         // LIST-annotated group with three-level encoding cannot be repeated.
-        if (repeated_field->is_group() &&
-            !static_cast<const 
GroupNode&>(*repeated_field).field(0)->is_repeated()) {
-          return Status::Invalid("LIST-annotated groups must not be 
repeated.");
+        if (repeated_field->is_group()) {
+          auto& repeated_group_field = static_cast<const 
GroupNode&>(*repeated_field);
+          if (repeated_group_field.field_count() == 0) {
+            return Status::Invalid("LIST-annotated groups must have at least 
one child.");
+          }
+          if (!repeated_group_field.field(0)->is_repeated()) {
+            return Status::Invalid("LIST-annotated groups must not be 
repeated.");
+          }
         }
         RETURN_NOT_OK(
             NodeToSchemaField(*repeated_field, current_levels, ctx, out, 
child_field));

Reply via email to