IMPALA-5172: Buffer overrun for Snappy decompression

When using a preallocated buffer for decompression, a
file corruption can lead to the expected decompressed size
being smaller than the actual decompressed size. Since
we use this for allocating the output buffer,
decompression needs to be able to handle a buffer that
is too small.

Snappy does not properly handle a buffer that is too small
and will overrun the buffer. This changes the code to
check the decompressed length and return an error if
the buffer is not large enough. It also adds a test to
verify that this behavior is respected for other
compression algorithms.

Change-Id: I45b75f61e8c0ae85f9add5b13ac2b161a803d2ba
Reviewed-on: http://gerrit.cloudera.org:8080/6625
Reviewed-by: Dan Hecht <[email protected]>
Tested-by: Dan Hecht <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/fcefe47d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/fcefe47d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/fcefe47d

Branch: refs/heads/master
Commit: fcefe47d7348f01e6a6fef700b421290a3f536b3
Parents: 532c5f2
Author: Joe McDonnell <[email protected]>
Authored: Wed Apr 12 16:46:15 2017 -0700
Committer: Dan Hecht <[email protected]>
Committed: Thu Apr 13 23:35:21 2017 +0000

----------------------------------------------------------------------
 be/src/util/decompress-test.cc | 38 +++++++++++++++++++++++++++++++++++++
 be/src/util/decompress.cc      | 22 ++++++++++++++++++---
 2 files changed, 57 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/fcefe47d/be/src/util/decompress-test.cc
----------------------------------------------------------------------
diff --git a/be/src/util/decompress-test.cc b/be/src/util/decompress-test.cc
index 48d17c4..7709bb1 100644
--- a/be/src/util/decompress-test.cc
+++ b/be/src/util/decompress-test.cc
@@ -70,6 +70,8 @@ class DecompressorTest : public ::testing::Test {
           sizeof(input_), input_);
       CompressAndDecompressNoOutputAllocated(compressor.get(), 
decompressor.get(),
           0, NULL);
+      DecompressInsufficientOutputBuffer(compressor.get(), decompressor.get(),
+          sizeof(input_), input_);
     } else {
       CompressAndDecompress(compressor.get(), decompressor.get(), 
sizeof(input_),
           input_);
@@ -79,6 +81,8 @@ class DecompressorTest : public ::testing::Test {
         // bzip does not allow NULL input
         CompressAndDecompress(compressor.get(), decompressor.get(), 0, input_);
       }
+      DecompressInsufficientOutputBuffer(compressor.get(), decompressor.get(),
+          sizeof(input_), input_);
     }
 
     compressor->Close();
@@ -140,6 +144,40 @@ class DecompressorTest : public ::testing::Test {
     EXPECT_EQ(memcmp(input, output, input_len), 0);
   }
 
+  // Test the behavior when the decompressor is given too little space to 
produce
+  // the decompressed output. Verify that the decompressor returns an error and
+  // does not overflow the provided buffer.
+  void DecompressInsufficientOutputBuffer(Codec* compressor, Codec* 
decompressor,
+      int64_t input_len, uint8_t* input) {
+    uint8_t* compressed;
+    int64_t compressed_length;
+    bool compress_preallocated = false;
+    int64_t max_compressed_length = compressor->MaxOutputLen(input_len, input);
+
+    if (max_compressed_length > 0) {
+      compressed = mem_pool_.Allocate(max_compressed_length);
+      compressed_length = max_compressed_length;
+      compress_preallocated = true;
+    }
+    EXPECT_OK(compressor->ProcessBlock(compress_preallocated, input_len,
+        input, &compressed_length, &compressed));
+    int64_t output_len = decompressor->MaxOutputLen(compressed_length, 
compressed);
+    if (output_len == -1) output_len = input_len;
+    uint8_t* output = mem_pool_.Allocate(output_len);
+
+    // Check that the decompressor respects the output_len by passing in an
+    // output len that is 4 bytes too small and verifying that those 4 bytes
+    // are not touched. The decompressor should return a non-ok status, as it
+    // does not have space to decompress the full output.
+    output_len = output_len - 4;
+    u_int32_t *canary = (u_int32_t *) &output[output_len];
+    *canary = 0x66aa77bb;
+    Status status = decompressor->ProcessBlock(true, compressed_length, 
compressed,
+                                               &output_len, &output);
+    EXPECT_EQ(*canary, 0x66aa77bb);
+    EXPECT_FALSE(status.ok());
+  }
+
   void Compress(Codec* compressor, int64_t input_len, uint8_t* input,
       int64_t* output_len, uint8_t** output, bool output_preallocated) {
     if (input == NULL && compressor->file_extension() == "bz2") {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/fcefe47d/be/src/util/decompress.cc
----------------------------------------------------------------------
diff --git a/be/src/util/decompress.cc b/be/src/util/decompress.cc
index 8897e98..506ddd7 100644
--- a/be/src/util/decompress.cc
+++ b/be/src/util/decompress.cc
@@ -435,7 +435,9 @@ static Status SnappyBlockDecompress(int64_t input_len, 
const uint8_t* input,
 
     if (!size_only) {
       int64_t remaining_output_size = *output_len - uncompressed_total_len;
-      DCHECK_GE(remaining_output_size, uncompressed_block_len);
+      if (remaining_output_size < uncompressed_block_len) {
+        return Status(TErrorCode::SNAPPY_DECOMPRESS_DECOMPRESS_SIZE_INCORRECT);
+      }
     }
 
     while (uncompressed_block_len > 0) {
@@ -459,6 +461,11 @@ static Status SnappyBlockDecompress(int64_t input_len, 
const uint8_t* input,
       DCHECK_GT(uncompressed_len, 0);
 
       if (!size_only) {
+        // Check output bounds
+        int64_t remaining_output_size = *output_len - uncompressed_total_len;
+        if (remaining_output_size < uncompressed_len) {
+          return 
Status(TErrorCode::SNAPPY_DECOMPRESS_DECOMPRESS_SIZE_INCORRECT);
+        }
         // Decompress this snappy block
         if (!snappy::RawUncompress(reinterpret_cast<const char*>(input),
                 compressed_len, output)) {
@@ -522,9 +529,12 @@ int64_t SnappyDecompressor::MaxOutputLen(int64_t 
input_len, const uint8_t* input
 
 Status SnappyDecompressor::ProcessBlock(bool output_preallocated, int64_t 
input_length,
     const uint8_t* input, int64_t* output_length, uint8_t** output) {
+  int64_t uncompressed_length = MaxOutputLen(input_length, input);
+  if (uncompressed_length < 0) {
+    return Status(TErrorCode::SNAPPY_DECOMPRESS_UNCOMPRESSED_LENGTH_FAILED);
+  }
+
   if (!output_preallocated) {
-    int64_t uncompressed_length = MaxOutputLen(input_length, input);
-    if (uncompressed_length < 0) return Status("Snappy: GetUncompressedLength 
failed");
     if (!reuse_buffer_ || out_buffer_ == NULL || buffer_length_ < 
uncompressed_length) {
       buffer_length_ = uncompressed_length;
       out_buffer_ = memory_pool_->TryAllocate(buffer_length_);
@@ -537,6 +547,12 @@ Status SnappyDecompressor::ProcessBlock(bool 
output_preallocated, int64_t input_
     }
     *output = out_buffer_;
     *output_length = uncompressed_length;
+  } else {
+    // If the preallocated buffer is too small (e.g. if the file metadata is 
corrupt),
+    // bail out early. Otherwise, this could result in a buffer overrun.
+    if (uncompressed_length > *output_length) {
+      return Status(TErrorCode::SNAPPY_DECOMPRESS_DECOMPRESS_SIZE_INCORRECT);
+    }
   }
 
   if (!snappy::RawUncompress(reinterpret_cast<const char*>(input),

Reply via email to