This is an automated email from the ASF dual-hosted git repository.

xndai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/master by this push:
     new 840713e  ORC-470: [C++] Fix BooleanRleDecoderImpl::skip() read over 
end of stream (#366)
840713e is described below

commit 840713e8a1a5fa5d1f76919f53dea55cac8275a5
Author: Gang Wu <[email protected]>
AuthorDate: Mon Feb 25 19:21:12 2019 -0800

    ORC-470: [C++] Fix BooleanRleDecoderImpl::skip() read over end of stream 
(#366)
    
    Fix #366
---
 c++/src/ByteRLE.cc      |  8 ++++++--
 c++/test/TestByteRle.cc | 42 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/c++/src/ByteRLE.cc b/c++/src/ByteRLE.cc
index 4bf8b7a..5fbd4a5 100644
--- a/c++/src/ByteRLE.cc
+++ b/c++/src/ByteRLE.cc
@@ -537,8 +537,12 @@ namespace orc {
       numValues -= remainingBits;
       uint64_t bytesSkipped = numValues / 8;
       ByteRleDecoderImpl::skip(bytesSkipped);
-      ByteRleDecoderImpl::next(&lastByte, 1, nullptr);
-      remainingBits = 8 - (numValues % 8);
+      if (numValues % 8 != 0) {
+        ByteRleDecoderImpl::next(&lastByte, 1, nullptr);
+        remainingBits = 8 - (numValues % 8);
+      } else {
+        remainingBits = 0;
+      }
     }
   }
 
diff --git a/c++/test/TestByteRle.cc b/c++/test/TestByteRle.cc
index 2c4d4e3..bc19e07 100644
--- a/c++/test/TestByteRle.cc
+++ b/c++/test/TestByteRle.cc
@@ -18,6 +18,9 @@
 
 #include "Adaptor.hh"
 #include "ByteRLE.hh"
+#include "Compression.hh"
+#include "MemoryInputStream.hh"
+#include "MemoryOutputStream.hh"
 #include "OrcTest.hh"
 #include "wrap/gtest-wrapper.h"
 
@@ -1420,4 +1423,43 @@ TEST(BooleanRle, seekBoolAndByteRLE) {
   rle->next(value, 1, nullptr);
   EXPECT_EQ(num[45], value[0]);
   }
+
+  TEST(BooleanRle, seekAndSkipToEnd) {
+    // in total 1024 boolean values which are all true
+    constexpr uint64_t numValues = 1024;
+    char data[numValues];
+    memset(data, 0x01, sizeof(data));
+
+    // create BooleanRleEncoder and encode all 1024 values
+    constexpr uint64_t blockSize = 1024, capacity = 1024 * 1024;
+    MemoryOutputStream memStream(capacity);
+    std::unique_ptr<ByteRleEncoder> encoder = createBooleanRleEncoder
+      (createCompressor(CompressionKind_ZSTD,
+                        &memStream,
+                        CompressionStrategy_COMPRESSION,
+                        capacity,
+                        blockSize,
+                        *getDefaultPool()));
+    encoder->add(data, numValues, nullptr);
+    encoder->flush();
+
+    // create BooleanRleDecoder and prepare decoding
+    std::unique_ptr<ByteRleDecoder> decoder = createBooleanRleDecoder(
+      createDecompressor(CompressionKind_ZSTD,
+                         std::unique_ptr<SeekableInputStream>(
+                           new SeekableArrayInputStream(memStream.getData(),
+                                                        
memStream.getLength())),
+                         blockSize,
+                         *getDefaultPool()));
+
+    // before fix of ORC-470, skip all remaining boolean values will get an
+    // exception since BooleanRLEDecoder still tries to read one last byte from
+    // underlying input stream even if the requested number of bits are 
multiple
+    // of 8 and then it reads over the end of stream.
+    decoder->skip(numValues);
+
+    // as we have reached the end of stream, try to read any data will get 
exception
+    EXPECT_ANY_THROW(decoder->next(data, 1, nullptr));
+  }
+
 }  // namespace orc

Reply via email to