This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/master by this push:
new c26ff4c ORC-581:[C++] Verify fieldNames size for STRUCT types
c26ff4c is described below
commit c26ff4c351d7c34c4272442a6874703f510282a8
Author: Quanlong Huang <[email protected]>
AuthorDate: Fri Jan 3 10:38:32 2020 +0800
ORC-581:[C++] Verify fieldNames size for STRUCT types
For STRUCT types, fieldNames size can be different than subTypes size in a
corrupt file. We should verify it to avoid crash.
This fixes #465
---
c++/src/Reader.cc | 15 +++++++++++----
c++/test/TestType.cc | 29 +++++++++++++++++++++++++++--
2 files changed, 38 insertions(+), 6 deletions(-)
diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
index ade2c29..8df6f42 100644
--- a/c++/src/Reader.cc
+++ b/c++/src/Reader.cc
@@ -1015,10 +1015,11 @@ namespace orc {
}
/**
- * Check that indices in the type tree are valid, so we won't crash
- * when we convert the proto::Types to TypeImpls.
+ * Check that proto Types are valid. Indices in the type tree should be
valid,
+ * so we won't crash when we convert the proto::Types to TypeImpls (ORC-317).
+ * For STRUCT types, fieldName size should match subTypes size (ORC-581).
*/
- void checkProtoTypeIds(const proto::Footer &footer) {
+ void checkProtoTypes(const proto::Footer &footer) {
std::stringstream msg;
int maxId = footer.types_size();
if (maxId <= 0) {
@@ -1026,6 +1027,12 @@ namespace orc {
}
for (int i = 0; i < maxId; ++i) {
const proto::Type& type = footer.types(i);
+ if (type.kind() == proto::Type_Kind_STRUCT
+ && type.subtypes_size() != type.fieldnames_size()) {
+ msg << "Footer is corrupt: STRUCT type " << i << " has " <<
type.subtypes_size()
+ << " subTypes, but has " << type.fieldnames_size() << "
fieldNames";
+ throw ParseError(msg.str());
+ }
for (int j = 0; j < type.subtypes_size(); ++j) {
int subTypeId = static_cast<int>(type.subtypes(j));
if (subTypeId <= i) {
@@ -1077,7 +1084,7 @@ namespace orc {
stream->getName());
}
- checkProtoTypeIds(*footer);
+ checkProtoTypes(*footer);
return REDUNDANT_MOVE(footer);
}
diff --git a/c++/test/TestType.cc b/c++/test/TestType.cc
index 5d61f6f..e70a9ef 100644
--- a/c++/test/TestType.cc
+++ b/c++/test/TestType.cc
@@ -343,7 +343,7 @@ namespace orc {
void expectParseError(const proto::Footer &footer, const char* errMsg) {
try {
- checkProtoTypeIds(footer);
+ checkProtoTypes(footer);
FAIL() << "Should throw ParseError for ill ids";
} catch (ParseError& e) {
EXPECT_EQ(e.what(), std::string(errMsg));
@@ -352,22 +352,26 @@ namespace orc {
}
}
- TEST(TestType, testCheckProtoTypeIds) {
+ TEST(TestType, testCheckProtoTypes) {
proto::Footer footer;
proto::Type rootType;
expectParseError(footer, "Footer is corrupt: no types found");
rootType.set_kind(proto::Type_Kind_STRUCT);
rootType.add_subtypes(1); // add a non existent type id
+ rootType.add_fieldnames("f1");
*(footer.add_types()) = rootType;
expectParseError(footer, "Footer is corrupt: types(1) not exists");
footer.clear_types();
rootType.clear_subtypes();
+ rootType.clear_fieldnames();
proto::Type structType;
structType.set_kind(proto::Type_Kind_STRUCT);
structType.add_subtypes(0); // construct a loop back to root
+ structType.add_fieldnames("root");
rootType.add_subtypes(1);
+ rootType.add_fieldnames("f1");
*(footer.add_types()) = rootType;
*(footer.add_types()) = structType;
expectParseError(footer,
@@ -375,12 +379,14 @@ namespace orc {
footer.clear_types();
rootType.clear_subtypes();
+ rootType.clear_fieldnames();
proto::Type listType;
listType.set_kind(proto::Type_Kind_LIST);
proto::Type mapType;
mapType.set_kind(proto::Type_Kind_MAP);
proto::Type unionType;
unionType.set_kind(proto::Type_Kind_UNION);
+ rootType.add_fieldnames("f1");
rootType.add_subtypes(1); // 0 -> 1
listType.add_subtypes(2); // 1 -> 2
mapType.add_subtypes(3); // 2 -> 3
@@ -394,16 +400,35 @@ namespace orc {
footer.clear_types();
rootType.clear_subtypes();
+ rootType.clear_fieldnames();
proto::Type intType;
intType.set_kind(proto::Type_Kind_INT);
proto::Type strType;
strType.set_kind(proto::Type_Kind_STRING);
rootType.add_subtypes(2);
+ rootType.add_fieldnames("f2");
rootType.add_subtypes(1);
+ rootType.add_fieldnames("f1");
*(footer.add_types()) = rootType;
*(footer.add_types()) = intType;
*(footer.add_types()) = strType;
expectParseError(footer,
"Footer is corrupt: subType(0) >= subType(1) in types(0). (2 >= 1)");
+
+ footer.clear_types();
+ rootType.clear_subtypes();
+ rootType.clear_fieldnames();
+ rootType.set_kind(proto::Type_Kind_STRUCT);
+ rootType.add_subtypes(1);
+ *(footer.add_types()) = rootType;
+ *(footer.add_types()) = intType;
+ expectParseError(footer,
+ "Footer is corrupt: STRUCT type 0 has 1 subTypes, but has 0
fieldNames");
+ // Should pass the check after adding the field name
+ footer.clear_types();
+ rootType.add_fieldnames("f1");
+ *(footer.add_types()) = rootType;
+ *(footer.add_types()) = intType;
+ checkProtoTypes(footer);
}
}