This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/master by this push:
new a4d6e84 ORC-590: [C++] added check for missing LENGTH stream in
StringDictionaryColumn
a4d6e84 is described below
commit a4d6e845576960f9f55c0a43082a8a6cdecd6338
Author: boroknagyz <[email protected]>
AuthorDate: Sun Jan 19 10:29:57 2020 +0100
ORC-590: [C++] added check for missing LENGTH stream in
StringDictionaryColumn
Added checks for missing streams in StringDictionaryColumnReader.
This fixes #469
---
c++/src/ColumnReader.cc | 19 +++++++++++--------
.../corrupt/missing_length_stream_in_string_dict.orc | Bin 0 -> 1788 bytes
tools/test/TestFileScan.cc | 2 ++
3 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc
index d77de5c..e01a24b 100644
--- a/c++/src/ColumnReader.cc
+++ b/c++/src/ColumnReader.cc
@@ -567,15 +567,18 @@ namespace orc {
RleVersion rleVersion = convertRleVersion(stripe.getEncoding(columnId)
.kind());
uint32_t dictSize = stripe.getEncoding(columnId).dictionarysize();
- rle = createRleDecoder(stripe.getStream(columnId,
- proto::Stream_Kind_DATA,
- true),
- false, rleVersion, memoryPool);
+ std::unique_ptr<SeekableInputStream> stream =
+ stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
+ if (stream == nullptr) {
+ throw ParseError("DATA stream not found in StringDictionaryColumn");
+ }
+ rle = createRleDecoder(std::move(stream), false, rleVersion, memoryPool);
+ stream = stripe.getStream(columnId, proto::Stream_Kind_LENGTH, false);
+ if (dictSize > 0 && stream == nullptr) {
+ throw ParseError("LENGTH stream not found in StringDictionaryColumn");
+ }
std::unique_ptr<RleDecoder> lengthDecoder =
- createRleDecoder(stripe.getStream(columnId,
- proto::Stream_Kind_LENGTH,
- false),
- false, rleVersion, memoryPool);
+ createRleDecoder(std::move(stream), false, rleVersion, memoryPool);
dictionary->dictionaryOffset.resize(dictSize + 1);
int64_t* lengthArray = dictionary->dictionaryOffset.data();
lengthDecoder->next(lengthArray + 1, dictSize, nullptr);
diff --git a/examples/corrupt/missing_length_stream_in_string_dict.orc
b/examples/corrupt/missing_length_stream_in_string_dict.orc
new file mode 100644
index 0000000..92912b0
Binary files /dev/null and
b/examples/corrupt/missing_length_stream_in_string_dict.orc differ
diff --git a/tools/test/TestFileScan.cc b/tools/test/TestFileScan.cc
index 54c044d..8c783d7 100644
--- a/tools/test/TestFileScan.cc
+++ b/tools/test/TestFileScan.cc
@@ -150,4 +150,6 @@ TEST (TestFileScan, testErrorHandling) {
"bad number of ColumnEncodings in StripeFooter: expected=6, actual=0");
checkForError(findExample("corrupt/negative_dict_entry_lengths.orc"),
"Negative dictionary entry length");
+
checkForError(findExample("corrupt/missing_length_stream_in_string_dict.orc"),
+ "LENGTH stream not found in StringDictionaryColumn");
}