Repository: orc
Updated Branches:
  refs/heads/master 04d65dc6d -> 9521a9e4f


ORC-403: [C++] Add checks to avoid invalid offsets in InputStream

Fixes #309

Signed-off-by: Owen O'Malley <omal...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/9521a9e4
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/9521a9e4
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/9521a9e4

Branch: refs/heads/master
Commit: 9521a9e4f978ce0192bfacdf6dfd24efe0acf9f0
Parents: 04d65dc
Author: stiga-huang <huangquanl...@gmail.com>
Authored: Sat Sep 15 18:59:18 2018 -0700
Committer: Owen O'Malley <omal...@apache.org>
Committed: Tue Sep 18 13:17:36 2018 -0700

----------------------------------------------------------------------
 c++/src/Reader.cc       | 54 +++++++++++++++++++++++++++++++++++++-------
 c++/src/Reader.hh       |  2 +-
 c++/src/StripeStream.cc | 18 ++++++++++++---
 c++/src/StripeStream.hh |  5 +++-
 4 files changed, 66 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/9521a9e4/c++/src/Reader.cc
----------------------------------------------------------------------
diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
index 0a3fdcd..4b7977a 100644
--- a/c++/src/Reader.cc
+++ b/c++/src/Reader.cc
@@ -489,15 +489,24 @@ namespace orc {
     throw std::range_error("key not found");
   }
 
-  void ReaderImpl::getRowIndexStatistics(
-           uint64_t stripeOffset, const proto::StripeFooter& 
currentStripeFooter,
-           std::vector<std::vector<proto::ColumnStatistics> >* indexStats) 
const {
+  void ReaderImpl::getRowIndexStatistics(const proto::StripeInformation& 
stripeInfo,
+      uint64_t stripeIndex, const proto::StripeFooter& currentStripeFooter,
+      std::vector<std::vector<proto::ColumnStatistics> >* indexStats) const {
     int num_streams = currentStripeFooter.streams_size();
-    uint64_t offset = stripeOffset;
+    uint64_t offset = stripeInfo.offset();
+    uint64_t indexEnd = stripeInfo.offset() + stripeInfo.indexlength();
     for (int i = 0; i < num_streams; i++) {
       const proto::Stream& stream = currentStripeFooter.streams(i);
       uint64_t length = static_cast<uint64_t>(stream.length());
       if (static_cast<StreamKind>(stream.kind()) == 
StreamKind::StreamKind_ROW_INDEX) {
+        if (offset + length > indexEnd) {
+          std::stringstream msg;
+          msg << "Malformed RowIndex stream meta in stripe " << stripeIndex
+              << ": streamOffset=" << offset << ", streamLength=" << length
+              << ", stripeOffset=" << stripeInfo.offset() << ", 
stripeIndexLength="
+              << stripeInfo.indexlength();
+          throw ParseError(msg.str());
+        }
         std::unique_ptr<SeekableInputStream> pbStream =
           createDecompressor(contents->compression,
                   std::unique_ptr<SeekableInputStream>
@@ -554,7 +563,7 @@ namespace orc {
     proto::StripeFooter currentStripeFooter =
         getStripeFooter(currentStripeInfo, *contents.get());
 
-    getRowIndexStatistics(currentStripeInfo.offset(), currentStripeFooter, 
&indexStats);
+    getRowIndexStatistics(currentStripeInfo, stripeIndex, currentStripeFooter, 
&indexStats);
 
     const Timezone& writerTZ =
       currentStripeFooter.has_writertimezone() ?
@@ -586,8 +595,15 @@ namespace orc {
 
   void ReaderImpl::readMetadata() const {
     uint64_t metadataSize = contents->postscript->metadatalength();
-    uint64_t metadataStart = fileLength - metadataSize
-      - contents->postscript->footerlength() - postscriptLength - 1;
+    uint64_t footerLength = contents->postscript->footerlength();
+    if (fileLength < metadataSize + footerLength + postscriptLength + 1) {
+      std::stringstream msg;
+      msg << "Invalid Metadata length: fileLength=" << fileLength
+          << ", metadataLength=" << metadataSize << ", footerLength=" << 
footerLength
+          << ", postscriptLength=" << postscriptLength;
+      throw ParseError(msg.str());
+    }
+    uint64_t metadataStart = fileLength - metadataSize - footerLength - 
postscriptLength - 1;
     if (metadataSize != 0) {
       std::unique_ptr<SeekableInputStream> pbStream =
         createDecompressor(contents->compression,
@@ -798,13 +814,24 @@ namespace orc {
   void RowReaderImpl::startNextStripe() {
     reader.reset(); // ColumnReaders use lots of memory; free old memory first
     currentStripeInfo = footer->stripes(static_cast<int>(currentStripe));
+    uint64_t fileLength = contents->stream->getLength();
+    if (currentStripeInfo.offset() + currentStripeInfo.indexlength() +
+        currentStripeInfo.datalength() + currentStripeInfo.footerlength() >= 
fileLength) {
+      std::stringstream msg;
+      msg << "Malformed StripeInformation at stripe index " << currentStripe 
<< ": fileLength="
+          << fileLength << ", StripeInfo=(offset=" << 
currentStripeInfo.offset() << ", indexLength="
+          << currentStripeInfo.indexlength() << ", dataLength=" << 
currentStripeInfo.datalength()
+          << ", footerLength=" << currentStripeInfo.footerlength() << ")";
+      throw ParseError(msg.str());
+    }
     currentStripeFooter = getStripeFooter(currentStripeInfo, *contents.get());
     rowsInCurrentStripe = currentStripeInfo.numberofrows();
     const Timezone& writerTimezone =
       currentStripeFooter.has_writertimezone() ?
         getTimezoneByName(currentStripeFooter.writertimezone()) :
         localTimezone;
-    StripeStreamsImpl stripeStreams(*this, currentStripeFooter,
+    StripeStreamsImpl stripeStreams(*this, currentStripe, currentStripeInfo,
+                                    currentStripeFooter,
                                     currentStripeInfo.offset(),
                                     *(contents->stream.get()),
                                     writerTimezone);
@@ -889,6 +916,12 @@ namespace orc {
 
     std::unique_ptr<proto::PostScript> postscript =
       std::unique_ptr<proto::PostScript>(new proto::PostScript());
+    if (readSize < 1 + postscriptSize) {
+      std::stringstream msg;
+      msg << "Invalid ORC postscript length: " << postscriptSize << ", file 
length = "
+          << stream->getLength();
+      throw ParseError(msg.str());
+    }
     if (!postscript->ParseFromArray(ptr + readSize - 1 - postscriptSize,
                                    static_cast<int>(postscriptSize))) {
       throw ParseError("Failed to parse the postscript from " +
@@ -997,6 +1030,11 @@ namespace orc {
         buffer.get(), postscriptLength));
       uint64_t footerSize = contents->postscript->footerlength();
       uint64_t tailSize = 1 + postscriptLength + footerSize;
+      if (tailSize >= fileLength) {
+        std::stringstream msg;
+        msg << "Invalid ORC tailSize=" << tailSize << ", fileLength=" << 
fileLength;
+        throw ParseError(msg.str());
+      }
       uint64_t footerOffset;
 
       if (tailSize > readSize) {

http://git-wip-us.apache.org/repos/asf/orc/blob/9521a9e4/c++/src/Reader.hh
----------------------------------------------------------------------
diff --git a/c++/src/Reader.hh b/c++/src/Reader.hh
index b952fb9..03ef9dd 100644
--- a/c++/src/Reader.hh
+++ b/c++/src/Reader.hh
@@ -190,7 +190,7 @@ namespace orc {
     // internal methods
     void readMetadata() const;
     void checkOrcVersion();
-    void getRowIndexStatistics(uint64_t stripeOffset,
+    void getRowIndexStatistics(const proto::StripeInformation& stripeInfo, 
uint64_t stripeIndex,
                                const proto::StripeFooter& currentStripeFooter,
                                
std::vector<std::vector<proto::ColumnStatistics> >* indexStats) const;
 

http://git-wip-us.apache.org/repos/asf/orc/blob/9521a9e4/c++/src/StripeStream.cc
----------------------------------------------------------------------
diff --git a/c++/src/StripeStream.cc b/c++/src/StripeStream.cc
index 75affa4..b63f19d 100644
--- a/c++/src/StripeStream.cc
+++ b/c++/src/StripeStream.cc
@@ -25,13 +25,16 @@
 
 namespace orc {
 
-  StripeStreamsImpl::StripeStreamsImpl(const RowReaderImpl& _reader,
+  StripeStreamsImpl::StripeStreamsImpl(const RowReaderImpl& _reader, uint64_t 
_index,
+                                       const proto::StripeInformation& 
_stripeInfo,
                                        const proto::StripeFooter& _footer,
                                        uint64_t _stripeStart,
                                        InputStream& _input,
                                        const Timezone& _writerTimezone
                                        ): reader(_reader),
+                                          stripeInfo(_stripeInfo),
                                           footer(_footer),
+                                          stripeIndex(_index),
                                           stripeStart(_stripeStart),
                                           input(_input),
                                           writerTimezone(_writerTimezone) {
@@ -77,14 +80,23 @@ namespace orc {
                                proto::Stream_Kind kind,
                                bool shouldStream) const {
     uint64_t offset = stripeStart;
+    uint64_t dataEnd = stripeInfo.offset() + stripeInfo.indexlength() + 
stripeInfo.datalength();
     MemoryPool *pool = reader.getFileContents().pool;
     for(int i = 0; i < footer.streams_size(); ++i) {
       const proto::Stream& stream = footer.streams(i);
       if (stream.has_kind() &&
           stream.kind() == kind &&
           stream.column() == static_cast<uint64_t>(columnId)) {
-        uint64_t myBlock = shouldStream ? input.getNaturalReadSize():
-          stream.length();
+        uint64_t streamLength = stream.length();
+        uint64_t myBlock = shouldStream ? input.getNaturalReadSize(): 
streamLength;
+        if (offset + streamLength > dataEnd) {
+          std::stringstream msg;
+          msg << "Malformed stream meta at stream index " << i << " in stripe 
" << stripeIndex
+              << ": streamOffset=" << offset << ", streamLength=" << 
streamLength
+              << ", stripeOffset=" << stripeInfo.offset() << ", 
stripeIndexLength="
+              << stripeInfo.indexlength() << ", stripeDataLength=" << 
stripeInfo.datalength();
+          throw ParseError(msg.str());
+        }
         return createDecompressor(reader.getCompression(),
                                   std::unique_ptr<SeekableInputStream>
                                   (new SeekableFileInputStream

http://git-wip-us.apache.org/repos/asf/orc/blob/9521a9e4/c++/src/StripeStream.hh
----------------------------------------------------------------------
diff --git a/c++/src/StripeStream.hh b/c++/src/StripeStream.hh
index 7770564..5cbaf60 100644
--- a/c++/src/StripeStream.hh
+++ b/c++/src/StripeStream.hh
@@ -37,13 +37,16 @@ namespace orc {
   class StripeStreamsImpl: public StripeStreams {
   private:
     const RowReaderImpl& reader;
+    const proto::StripeInformation& stripeInfo;
     const proto::StripeFooter& footer;
+    const uint64_t stripeIndex;
     const uint64_t stripeStart;
     InputStream& input;
     const Timezone& writerTimezone;
 
   public:
-    StripeStreamsImpl(const RowReaderImpl& reader,
+    StripeStreamsImpl(const RowReaderImpl& reader, uint64_t index,
+                      const proto::StripeInformation& stripeInfo,
                       const proto::StripeFooter& footer,
                       uint64_t stripeStart,
                       InputStream& input,

Reply via email to