This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/master by this push:
     new a4d6e84  ORC-590: [C++] added check for missing LENGTH stream in 
StringDictionaryColumn
a4d6e84 is described below

commit a4d6e845576960f9f55c0a43082a8a6cdecd6338
Author: boroknagyz <[email protected]>
AuthorDate: Sun Jan 19 10:29:57 2020 +0100

    ORC-590: [C++] added check for missing LENGTH stream in 
StringDictionaryColumn
    
    Added checks for missing streams in StringDictionaryColumnReader.
    
    This fixes #469
---
 c++/src/ColumnReader.cc                              |  19 +++++++++++--------
 .../corrupt/missing_length_stream_in_string_dict.orc | Bin 0 -> 1788 bytes
 tools/test/TestFileScan.cc                           |   2 ++
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc
index d77de5c..e01a24b 100644
--- a/c++/src/ColumnReader.cc
+++ b/c++/src/ColumnReader.cc
@@ -567,15 +567,18 @@ namespace orc {
     RleVersion rleVersion = convertRleVersion(stripe.getEncoding(columnId)
                                                 .kind());
     uint32_t dictSize = stripe.getEncoding(columnId).dictionarysize();
-    rle = createRleDecoder(stripe.getStream(columnId,
-                                            proto::Stream_Kind_DATA,
-                                            true),
-                           false, rleVersion, memoryPool);
+    std::unique_ptr<SeekableInputStream> stream =
+        stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
+    if (stream == nullptr) {
+      throw ParseError("DATA stream not found in StringDictionaryColumn");
+    }
+    rle = createRleDecoder(std::move(stream), false, rleVersion, memoryPool);
+    stream = stripe.getStream(columnId, proto::Stream_Kind_LENGTH, false);
+    if (dictSize > 0 && stream == nullptr) {
+      throw ParseError("LENGTH stream not found in StringDictionaryColumn");
+    }
     std::unique_ptr<RleDecoder> lengthDecoder =
-            createRleDecoder(stripe.getStream(columnId,
-                                        proto::Stream_Kind_LENGTH,
-                                        false),
-                       false, rleVersion, memoryPool);
+        createRleDecoder(std::move(stream), false, rleVersion, memoryPool);
     dictionary->dictionaryOffset.resize(dictSize + 1);
     int64_t* lengthArray = dictionary->dictionaryOffset.data();
     lengthDecoder->next(lengthArray + 1, dictSize, nullptr);
diff --git a/examples/corrupt/missing_length_stream_in_string_dict.orc 
b/examples/corrupt/missing_length_stream_in_string_dict.orc
new file mode 100644
index 0000000..92912b0
Binary files /dev/null and 
b/examples/corrupt/missing_length_stream_in_string_dict.orc differ
diff --git a/tools/test/TestFileScan.cc b/tools/test/TestFileScan.cc
index 54c044d..8c783d7 100644
--- a/tools/test/TestFileScan.cc
+++ b/tools/test/TestFileScan.cc
@@ -150,4 +150,6 @@ TEST (TestFileScan, testErrorHandling) {
       "bad number of ColumnEncodings in StripeFooter: expected=6, actual=0");
   checkForError(findExample("corrupt/negative_dict_entry_lengths.orc"),
         "Negative dictionary entry length");
+  
checkForError(findExample("corrupt/missing_length_stream_in_string_dict.orc"),
+        "LENGTH stream not found in StringDictionaryColumn");
 }

Reply via email to