[ 
https://issues.apache.org/jira/browse/ARROW-2222?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16446832#comment-16446832
 ] 

ASF GitHub Bot commented on ARROW-2222:
---------------------------------------

xhochy closed pull request #1763: ARROW-2222: handle untrusted inputs
URL: https://github.com/apache/arrow/pull/1763
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/cpp/src/arrow/io/memory.cc b/cpp/src/arrow/io/memory.cc
index 45d6bdbde3..512e7f513e 100644
--- a/cpp/src/arrow/io/memory.cc
+++ b/cpp/src/arrow/io/memory.cc
@@ -257,6 +257,9 @@ Status BufferReader::Tell(int64_t* position) const {
 bool BufferReader::supports_zero_copy() const { return true; }
 
 Status BufferReader::Read(int64_t nbytes, int64_t* bytes_read, void* buffer) {
+  if (nbytes < 0) {
+    return Status::IOError("Cannot read a negative number of bytes from 
BufferReader.");
+  }
   *bytes_read = std::min(nbytes, size_ - position_);
   if (*bytes_read) {
     memcpy(buffer, data_ + position_, *bytes_read);
@@ -266,6 +269,9 @@ Status BufferReader::Read(int64_t nbytes, int64_t* 
bytes_read, void* buffer) {
 }
 
 Status BufferReader::Read(int64_t nbytes, std::shared_ptr<Buffer>* out) {
+  if (nbytes < 0) {
+    return Status::IOError("Cannot read a negative number of bytes from 
BufferReader.");
+  }
   int64_t size = std::min(nbytes, size_ - position_);
 
   if (size > 0 && buffer_ != nullptr) {
diff --git a/cpp/src/arrow/ipc/message.cc b/cpp/src/arrow/ipc/message.cc
index c14fb49274..0a5bcdc70f 100644
--- a/cpp/src/arrow/ipc/message.cc
+++ b/cpp/src/arrow/ipc/message.cc
@@ -136,7 +136,12 @@ bool Message::Equals(const Message& other) const {
 
 Status Message::ReadFrom(const std::shared_ptr<Buffer>& metadata, 
io::InputStream* stream,
                          std::unique_ptr<Message>* out) {
-  auto fb_message = flatbuf::GetMessage(metadata->data());
+  auto data = metadata->data();
+  flatbuffers::Verifier verifier(data, metadata->size(), 128);
+  if (!flatbuf::VerifyMessageBuffer(verifier)) {
+    return Status::IOError("Invalid flatbuffers message.");
+  }
+  auto fb_message = flatbuf::GetMessage(data);
 
   int64_t body_length = fb_message->bodyLength();
 
diff --git a/cpp/src/arrow/ipc/metadata-internal.cc 
b/cpp/src/arrow/ipc/metadata-internal.cc
index af1d6c8515..d54323919d 100644
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -817,6 +817,9 @@ static Status VisitField(const flatbuf::Field* field, 
DictionaryTypeMap* id_to_f
   if (dict_metadata == nullptr) {
     // Field is not dictionary encoded. Visit children
     auto children = field->children();
+    if (children == nullptr) {
+      return Status::IOError("Children-pointer of flatbuffer-encoded Field is 
null.");
+    }
     for (flatbuffers::uoffset_t i = 0; i < children->size(); ++i) {
       RETURN_NOT_OK(VisitField(children->Get(i), id_to_field));
     }
@@ -832,9 +835,16 @@ static Status VisitField(const flatbuf::Field* field, 
DictionaryTypeMap* id_to_f
 
 Status GetDictionaryTypes(const void* opaque_schema, DictionaryTypeMap* 
id_to_field) {
   auto schema = static_cast<const flatbuf::Schema*>(opaque_schema);
+  if (schema->fields() == nullptr) {
+    return Status::IOError("Fields-pointer of flatbuffer-encoded Schema is 
null.");
+  }
   int num_fields = static_cast<int>(schema->fields()->size());
   for (int i = 0; i < num_fields; ++i) {
-    RETURN_NOT_OK(VisitField(schema->fields()->Get(i), id_to_field));
+    auto field = schema->fields()->Get(i);
+    if (field == nullptr) {
+      return Status::IOError("Field-pointer of flatbuffer-encoded Schema is 
null.");
+    }
+    RETURN_NOT_OK(VisitField(field, id_to_field));
   }
   return Status::OK();
 }
@@ -842,6 +852,9 @@ Status GetDictionaryTypes(const void* opaque_schema, 
DictionaryTypeMap* id_to_fi
 Status GetSchema(const void* opaque_schema, const DictionaryMemo& 
dictionary_memo,
                  std::shared_ptr<Schema>* out) {
   auto schema = static_cast<const flatbuf::Schema*>(opaque_schema);
+  if (schema->fields() == nullptr) {
+    return Status::IOError("Fields-pointer of flatbuffer-encoded Schema is 
null.");
+  }
   int num_fields = static_cast<int>(schema->fields()->size());
 
   std::vector<std::shared_ptr<Field>> fields(num_fields);
@@ -855,6 +868,14 @@ Status GetSchema(const void* opaque_schema, const 
DictionaryMemo& dictionary_mem
   if (fb_metadata != nullptr) {
     metadata->reserve(fb_metadata->size());
     for (const auto& pair : *fb_metadata) {
+      if (pair->key() == nullptr) {
+        return Status::IOError(
+            "Key-pointer in custom metadata of flatbuffer-encoded Schema is 
null.");
+      }
+      if (pair->value() == nullptr) {
+        return Status::IOError(
+            "Value-pointer in custom metadata of flatbuffer-encoded Schema is 
null.");
+      }
       metadata->Append(pair->key()->str(), pair->value()->str());
     }
   }
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index 6441bc33cf..5ffbe6f40f 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -328,6 +328,9 @@ Status ReadRecordBatch(const Buffer& metadata, const 
std::shared_ptr<Schema>& sc
   if (message->header_type() != flatbuf::MessageHeader_RecordBatch) {
     DCHECK_EQ(message->header_type(), flatbuf::MessageHeader_RecordBatch);
   }
+  if (message->header() == nullptr) {
+    return Status::IOError("Header-pointer of flatbuffer-encoded Message is 
null.");
+  }
   auto batch = reinterpret_cast<const 
flatbuf::RecordBatch*>(message->header());
   return ReadRecordBatch(batch, schema, max_recursion_depth, file, out);
 }
@@ -426,6 +429,9 @@ class RecordBatchStreamReader::RecordBatchStreamReaderImpl {
     RETURN_NOT_OK(
         ReadMessageAndValidate(message_reader_.get(), Message::SCHEMA, false, 
&message));
 
+    if (message->header() == nullptr) {
+      return Status::IOError("Header-pointer of flatbuffer-encoded Message is 
null.");
+    }
     RETURN_NOT_OK(internal::GetDictionaryTypes(message->header(), 
&dictionary_types_));
 
     // TODO(wesm): In future, we may want to reconcile the ids in the stream 
with


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


> [C++] Add option to validate Flatbuffers messages
> -------------------------------------------------
>
>                 Key: ARROW-2222
>                 URL: https://issues.apache.org/jira/browse/ARROW-2222
>             Project: Apache Arrow
>          Issue Type: Improvement
>          Components: C++
>            Reporter: Wes McKinney
>            Assignee: Marco Neumann
>            Priority: Major
>              Labels: pull-request-available
>             Fix For: 0.10.0
>
>
> This is follow up work to ARROW-1589, ARROW-2023, and can be validated by the 
> {{ipc-fuzzer-test}}. Users receiving untrusted input streams can prevent 
> segfaults this way
> As part of this, we should quantify the overhead associated with message 
> validation in regular use



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to