This is an automated email from the ASF dual-hosted git repository.
xndai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/master by this push:
new 840713e ORC-470: [C++] Fix BooleanRleDecoderImpl::skip() read over
end of stream (#366)
840713e is described below
commit 840713e8a1a5fa5d1f76919f53dea55cac8275a5
Author: Gang Wu <[email protected]>
AuthorDate: Mon Feb 25 19:21:12 2019 -0800
ORC-470: [C++] Fix BooleanRleDecoderImpl::skip() read over end of stream
(#366)
Fix #366
---
c++/src/ByteRLE.cc | 8 ++++++--
c++/test/TestByteRle.cc | 42 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 48 insertions(+), 2 deletions(-)
diff --git a/c++/src/ByteRLE.cc b/c++/src/ByteRLE.cc
index 4bf8b7a..5fbd4a5 100644
--- a/c++/src/ByteRLE.cc
+++ b/c++/src/ByteRLE.cc
@@ -537,8 +537,12 @@ namespace orc {
numValues -= remainingBits;
uint64_t bytesSkipped = numValues / 8;
ByteRleDecoderImpl::skip(bytesSkipped);
- ByteRleDecoderImpl::next(&lastByte, 1, nullptr);
- remainingBits = 8 - (numValues % 8);
+ if (numValues % 8 != 0) {
+ ByteRleDecoderImpl::next(&lastByte, 1, nullptr);
+ remainingBits = 8 - (numValues % 8);
+ } else {
+ remainingBits = 0;
+ }
}
}
diff --git a/c++/test/TestByteRle.cc b/c++/test/TestByteRle.cc
index 2c4d4e3..bc19e07 100644
--- a/c++/test/TestByteRle.cc
+++ b/c++/test/TestByteRle.cc
@@ -18,6 +18,9 @@
#include "Adaptor.hh"
#include "ByteRLE.hh"
+#include "Compression.hh"
+#include "MemoryInputStream.hh"
+#include "MemoryOutputStream.hh"
#include "OrcTest.hh"
#include "wrap/gtest-wrapper.h"
@@ -1420,4 +1423,43 @@ TEST(BooleanRle, seekBoolAndByteRLE) {
rle->next(value, 1, nullptr);
EXPECT_EQ(num[45], value[0]);
}
+
+ TEST(BooleanRle, seekAndSkipToEnd) {
+ // in total 1024 boolean values which are all true
+ constexpr uint64_t numValues = 1024;
+ char data[numValues];
+ memset(data, 0x01, sizeof(data));
+
+ // create BooleanRleEncoder and encode all 1024 values
+ constexpr uint64_t blockSize = 1024, capacity = 1024 * 1024;
+ MemoryOutputStream memStream(capacity);
+ std::unique_ptr<ByteRleEncoder> encoder = createBooleanRleEncoder
+ (createCompressor(CompressionKind_ZSTD,
+ &memStream,
+ CompressionStrategy_COMPRESSION,
+ capacity,
+ blockSize,
+ *getDefaultPool()));
+ encoder->add(data, numValues, nullptr);
+ encoder->flush();
+
+ // create BooleanRleDecoder and prepare decoding
+ std::unique_ptr<ByteRleDecoder> decoder = createBooleanRleDecoder(
+ createDecompressor(CompressionKind_ZSTD,
+ std::unique_ptr<SeekableInputStream>(
+ new SeekableArrayInputStream(memStream.getData(),
+
memStream.getLength())),
+ blockSize,
+ *getDefaultPool()));
+
+ // before fix of ORC-470, skip all remaining boolean values will get an
+ // exception since BooleanRLEDecoder still tries to read one last byte from
+ // underlying input stream even if the requested number of bits are
multiple
+ // of 8 and then it reads over the end of stream.
+ decoder->skip(numValues);
+
+ // as we have reached the end of stream, try to read any data will get
exception
+ EXPECT_ANY_THROW(decoder->next(data, 1, nullptr));
+ }
+
} // namespace orc