This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/master by this push:
     new c26ff4c  ORC-581:[C++] Verify fieldNames size for STRUCT types
c26ff4c is described below

commit c26ff4c351d7c34c4272442a6874703f510282a8
Author: Quanlong Huang <[email protected]>
AuthorDate: Fri Jan 3 10:38:32 2020 +0800

    ORC-581:[C++] Verify fieldNames size for STRUCT types
    
    For STRUCT types, fieldNames size can be different than subTypes size in a 
corrupt file. We should verify it to avoid crash.
    
    This fixes #465
---
 c++/src/Reader.cc    | 15 +++++++++++----
 c++/test/TestType.cc | 29 +++++++++++++++++++++++++++--
 2 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
index ade2c29..8df6f42 100644
--- a/c++/src/Reader.cc
+++ b/c++/src/Reader.cc
@@ -1015,10 +1015,11 @@ namespace orc {
   }
 
   /**
-   * Check that indices in the type tree are valid, so we won't crash
-   * when we convert the proto::Types to TypeImpls.
+   * Check that proto Types are valid. Indices in the type tree should be 
valid,
+   * so we won't crash when we convert the proto::Types to TypeImpls (ORC-317).
+   * For STRUCT types, fieldName size should match subTypes size (ORC-581).
    */
-  void checkProtoTypeIds(const proto::Footer &footer) {
+  void checkProtoTypes(const proto::Footer &footer) {
     std::stringstream msg;
     int maxId = footer.types_size();
     if (maxId <= 0) {
@@ -1026,6 +1027,12 @@ namespace orc {
     }
     for (int i = 0; i < maxId; ++i) {
       const proto::Type& type = footer.types(i);
+      if (type.kind() == proto::Type_Kind_STRUCT
+         && type.subtypes_size() != type.fieldnames_size()) {
+        msg << "Footer is corrupt: STRUCT type " << i << " has " << 
type.subtypes_size()
+            << " subTypes, but has " << type.fieldnames_size() << " 
fieldNames";
+        throw ParseError(msg.str());
+      }
       for (int j = 0; j < type.subtypes_size(); ++j) {
         int subTypeId = static_cast<int>(type.subtypes(j));
         if (subTypeId <= i) {
@@ -1077,7 +1084,7 @@ namespace orc {
                        stream->getName());
     }
 
-    checkProtoTypeIds(*footer);
+    checkProtoTypes(*footer);
     return REDUNDANT_MOVE(footer);
   }
 
diff --git a/c++/test/TestType.cc b/c++/test/TestType.cc
index 5d61f6f..e70a9ef 100644
--- a/c++/test/TestType.cc
+++ b/c++/test/TestType.cc
@@ -343,7 +343,7 @@ namespace orc {
 
   void expectParseError(const proto::Footer &footer, const char* errMsg) {
     try {
-      checkProtoTypeIds(footer);
+      checkProtoTypes(footer);
       FAIL() << "Should throw ParseError for ill ids";
     } catch (ParseError& e) {
       EXPECT_EQ(e.what(), std::string(errMsg));
@@ -352,22 +352,26 @@ namespace orc {
     }
   }
 
-  TEST(TestType, testCheckProtoTypeIds) {
+  TEST(TestType, testCheckProtoTypes) {
     proto::Footer footer;
     proto::Type rootType;
     expectParseError(footer, "Footer is corrupt: no types found");
 
     rootType.set_kind(proto::Type_Kind_STRUCT);
     rootType.add_subtypes(1); // add a non existent type id
+    rootType.add_fieldnames("f1");
     *(footer.add_types()) = rootType;
     expectParseError(footer, "Footer is corrupt: types(1) not exists");
 
     footer.clear_types();
     rootType.clear_subtypes();
+    rootType.clear_fieldnames();
     proto::Type structType;
     structType.set_kind(proto::Type_Kind_STRUCT);
     structType.add_subtypes(0);  // construct a loop back to root
+    structType.add_fieldnames("root");
     rootType.add_subtypes(1);
+    rootType.add_fieldnames("f1");
     *(footer.add_types()) = rootType;
     *(footer.add_types()) = structType;
     expectParseError(footer,
@@ -375,12 +379,14 @@ namespace orc {
 
     footer.clear_types();
     rootType.clear_subtypes();
+    rootType.clear_fieldnames();
     proto::Type listType;
     listType.set_kind(proto::Type_Kind_LIST);
     proto::Type mapType;
     mapType.set_kind(proto::Type_Kind_MAP);
     proto::Type unionType;
     unionType.set_kind(proto::Type_Kind_UNION);
+    rootType.add_fieldnames("f1");
     rootType.add_subtypes(1);   // 0 -> 1
     listType.add_subtypes(2);   // 1 -> 2
     mapType.add_subtypes(3);    // 2 -> 3
@@ -394,16 +400,35 @@ namespace orc {
 
     footer.clear_types();
     rootType.clear_subtypes();
+    rootType.clear_fieldnames();
     proto::Type intType;
     intType.set_kind(proto::Type_Kind_INT);
     proto::Type strType;
     strType.set_kind(proto::Type_Kind_STRING);
     rootType.add_subtypes(2);
+    rootType.add_fieldnames("f2");
     rootType.add_subtypes(1);
+    rootType.add_fieldnames("f1");
     *(footer.add_types()) = rootType;
     *(footer.add_types()) = intType;
     *(footer.add_types()) = strType;
     expectParseError(footer,
         "Footer is corrupt: subType(0) >= subType(1) in types(0). (2 >= 1)");
+
+    footer.clear_types();
+    rootType.clear_subtypes();
+    rootType.clear_fieldnames();
+    rootType.set_kind(proto::Type_Kind_STRUCT);
+    rootType.add_subtypes(1);
+    *(footer.add_types()) = rootType;
+    *(footer.add_types()) = intType;
+    expectParseError(footer,
+        "Footer is corrupt: STRUCT type 0 has 1 subTypes, but has 0 
fieldNames");
+    // Should pass the check after adding the field name
+    footer.clear_types();
+    rootType.add_fieldnames("f1");
+    *(footer.add_types()) = rootType;
+    *(footer.add_types()) = intType;
+    checkProtoTypes(footer);
   }
 }

Reply via email to