emkornfield commented on a change in pull request #7973:
URL: https://github.com/apache/arrow/pull/7973#discussion_r474409559



##########
File path: cpp/src/parquet/arrow/schema.cc
##########
@@ -410,21 +410,66 @@ ::arrow::Result<std::shared_ptr<ArrowType>> 
GetTypeForNode(
   return storage_type;
 }
 
-Status NodeToSchemaField(const Node& node, int16_t max_def_level, int16_t 
max_rep_level,
+struct LevelInfo {
+  int16_t def_level = 0;
+  int16_t rep_level = 0;
+  int16_t repeated_ancestor_def_level = 0;

Review comment:
       consolidated into 1 in level_conversions where ina follow-up PR will use 
it as an argument to the levels there.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, 
&arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& 
manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const 
::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  
primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));

Review comment:
       the latter. added comment.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, 
&arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& 
manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const 
::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  
primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));

Review comment:
       yes.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, 
&arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& 
manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const 
::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  
primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));

Review comment:
       yes.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, 
&arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& 
manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const 
::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  
primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional 
child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated 
field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter 
field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, 
ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a 
null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner 
struct field
+
+                          Levels{/*def_level=*/5, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f0 bool 
field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/1));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a 
null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner 
struct field
+
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f1 bool 
field
+
+  // Legacy 2-level necoding

Review comment:
       Non-legacy is three level encoding above.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, 
&arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& 
manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const 
::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  
primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional 
child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated 
field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter 
field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, 
ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));

Review comment:
       Yes.  The recommended standard to list types is 3 level encoding.  Which 
has an outer group indicating nullability of the "list" field.  A repeated 
child-group to indicate its repeatedness and an inner group to indicate 
nullabiity of elements.  Reference: 
https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#nested-types

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, 
&arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& 
manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const 
::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  
primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional 
child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated 
field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter 
field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, 
ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a 
null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner 
struct field
+
+                          Levels{/*def_level=*/5, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f0 bool 
field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/1));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a 
null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner 
struct field
+
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f1 bool 
field
+
+  // Legacy 2-level necoding
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("bool", Repetition::REPEATED, 
ParquetType::BOOLEAN)},
+          ConvertedType::LIST)}));
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a 
null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // inner 
struct field

Review comment:
       yes.  fixed.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, 
&arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& 
manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const 
::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  
primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional 
child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated 
field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter 
field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, 
ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));

Review comment:
       ConvertedType is legacy, correct, this was copy-pasta and should be 
fixed now.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, 
&arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& 
manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const 
::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  
primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional 
child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated 
field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter 
field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, 
ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a 
null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner 
struct field
+
+                          Levels{/*def_level=*/5, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f0 bool 
field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/1));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a 
null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner 
struct field
+
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f1 bool 
field
+
+  // Legacy 2-level necoding

Review comment:
       Legacy because it isn't recommended.  Gave a pointer to  
https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#nested-types

##########
File path: cpp/src/parquet/arrow/schema.cc
##########
@@ -606,23 +652,27 @@ Status NodeToSchemaField(const Node& node, int16_t 
current_def_level,
     if (node.is_repeated()) {
       // One-level list encoding, e.g.
       // a: repeated int32;
+      int16_t repeated_ancestor_def_level = current_levels.IncrementRepeated();
       out->children.resize(1);
       auto child_field = ::arrow::field(node.name(), type, /*nullable=*/false);
-      RETURN_NOT_OK(PopulateLeaf(column_index, child_field, current_def_level,
-                                 current_rep_level, ctx, out, 
&out->children[0]));
+      RETURN_NOT_OK(PopulateLeaf(column_index, child_field, current_levels, 
ctx, out,
+                                 &out->children[0]));
 
       out->field = ::arrow::field(node.name(), ::arrow::list(child_field),
                                   /*nullable=*/false, 
FieldIdMetadata(node.field_id()));
       // Is this right?

Review comment:
       probably not but I found it entertaining.  removed.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, 
&arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& 
manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const 
::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  
primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional 
child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated 
field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter 
field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, 
ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a 
null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner 
struct field
+
+                          Levels{/*def_level=*/5, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f0 bool 
field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/1));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a 
null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner 
struct field
+
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f1 bool 
field
+
+  // Legacy 2-level necoding
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("bool", Repetition::REPEATED, 
ParquetType::BOOLEAN)},
+          ConvertedType::LIST)}));
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a 
null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // inner 
struct field
+}
+
+TEST_F(TestLevels, ListErrors) {
+  {
+    ::arrow::Status error = MaybeSetParquetSchema(GroupNode::Make(
+        "child_list", Repetition::REPEATED,
+        {PrimitiveNode::Make("bool", Repetition::REPEATED, 
ParquetType::BOOLEAN)},
+        ConvertedType::LIST));
+    EXPECT_TRUE(error.IsInvalid());
+    std::string expected("LIST-annotated groups must not be repeated.");
+    EXPECT_EQ(error.message().substr(0, expected.size()), expected);

Review comment:
       I was originally using testing::HasSubstr but it had linking issues on 
windows (I opened a JIRA for this) and didn't want to spend the time 
investigating.  I think it might be the same with StartsWith.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, 
&arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& 
manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const 
::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  
primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional 
child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated 
field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter 
field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, 
ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a 
null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner 
struct field
+
+                          Levels{/*def_level=*/5, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f0 bool 
field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/1));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a 
null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner 
struct field
+
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f1 bool 
field
+
+  // Legacy 2-level necoding
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("bool", Repetition::REPEATED, 
ParquetType::BOOLEAN)},
+          ConvertedType::LIST)}));
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a 
null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // inner 
struct field
+}
+
+TEST_F(TestLevels, ListErrors) {
+  {
+    ::arrow::Status error = MaybeSetParquetSchema(GroupNode::Make(
+        "child_list", Repetition::REPEATED,
+        {PrimitiveNode::Make("bool", Repetition::REPEATED, 
ParquetType::BOOLEAN)},
+        ConvertedType::LIST));
+    EXPECT_TRUE(error.IsInvalid());

Review comment:
       done.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, 
&arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& 
manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const 
::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  
primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional 
child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated 
field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter 
field

Review comment:
       updated with a comment.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, 
&arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& 
manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const 
::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  
primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional 
child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated 
field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter 
field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, 
ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a 
null list.

Review comment:
       Tried to rephrase.  Def level 2 and def level 3 are use to discriminate 
for the same field `child_list` which is a `nullable list`.  When decoding 
`def_level=2` indicates a non-null but empty list.  `def_level==3` indicates an 
element present in the list.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, 
&arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& 
manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const 
::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  
primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional 
child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated 
field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter 
field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, 
ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a 
null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner 
struct field
+
+                          Levels{/*def_level=*/5, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f0 bool 
field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/1));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a 
null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner 
struct field
+
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f1 bool 
field
+
+  // Legacy 2-level necoding
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("bool", Repetition::REPEATED, 
ParquetType::BOOLEAN)},
+          ConvertedType::LIST)}));
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a 
null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // inner 
struct field
+}
+
+TEST_F(TestLevels, ListErrors) {
+  {
+    ::arrow::Status error = MaybeSetParquetSchema(GroupNode::Make(
+        "child_list", Repetition::REPEATED,
+        {PrimitiveNode::Make("bool", Repetition::REPEATED, 
ParquetType::BOOLEAN)},
+        ConvertedType::LIST));
+    EXPECT_TRUE(error.IsInvalid());
+    std::string expected("LIST-annotated groups must not be repeated.");
+    EXPECT_EQ(error.message().substr(0, expected.size()), expected);
+  }

Review comment:
       From the spec:
   
   > The outer-most level must be a group annotated with LIST that contains a 
single field named list. The repetition of this level must be either optional 
or required and determines whether the list is nullable.
   
   I read that as it should never be repeated (I didn't see any exceptions 
under the 2-level encoding but maybe we should clarify on parquet-dev@)?

##########
File path: cpp/src/parquet/arrow/schema.cc
##########
@@ -410,21 +410,66 @@ ::arrow::Result<std::shared_ptr<ArrowType>> 
GetTypeForNode(
   return storage_type;
 }
 
-Status NodeToSchemaField(const Node& node, int16_t max_def_level, int16_t 
max_rep_level,
+struct LevelInfo {
+  int16_t def_level = 0;
+  int16_t rep_level = 0;
+  int16_t repeated_ancestor_def_level = 0;
+
+  /// Copies current levels to the schema field.
+  void Populate(SchemaField* out) {
+    out->definition_level = def_level;
+    out->repetition_level = rep_level;
+    out->repeated_ancestor_definition_level = repeated_ancestor_def_level;
+  }
+
+  /// Increments levels according to the cardinality of node.
+  void Increment(const Node& node) {
+    if (node.is_repeated()) {
+      IncrementRepeated();
+      return;
+    }
+    if (node.is_optional()) {
+      IncrementOptional();
+      return;
+    }
+  }
+
+  /// Incremetns level for a optional node.
+  void IncrementOptional() { def_level++; }
+
+  /// Increments levels for the repeated node.  Returns
+  /// the previous ancestor_list_def_level.
+  int16_t IncrementRepeated() {
+    int16_t last_repeated_ancestor = repeated_ancestor_def_level;
+
+    // Repeated fields add both a repetition and definition level. This is used
+    // to distinguish between an empty list and a list with an item in it.
+    ++rep_level;
+    ++def_level;
+    // For levels >= current_def_level it indicates the list was

Review comment:
       should be repeated_ancestor_def_level.

##########
File path: cpp/src/parquet/arrow/schema.cc
##########
@@ -554,41 +605,36 @@ Status GroupToSchemaField(const GroupNode& node, int16_t 
current_def_level,
     // repeated group $NAME {
     //   r/o TYPE[0] f0
     //   r/o TYPE[1] f1
-    // }

Review comment:
       overzealous delete i think.  added back.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to