This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 0c8aee1  ARROW-5921: [C++] Fix multiple nullptr related crashes in IPC
0c8aee1 is described below

commit 0c8aee18e66803b4e2c3e9007c4f553f54c87ff9
Author: Marco Neumann <[email protected]>
AuthorDate: Sat Jul 13 09:35:52 2019 -0500

    ARROW-5921: [C++] Fix multiple nullptr related crashes in IPC
    
    Fixes the following known fuzzer crashes:
    
    - 09f72ba2a52b80366ab676364abec850fc668168
    - 607e9caa76863a97f2694a769a1ae2fb83c55e02
    - cb8cedb6ff8a6f164210c497d91069812ef5d6f8
    - f37e71777ad0324b55b99224f2c7ffb0107bdfa2
    - fd237566879dc60fff4d956d5fe3533d74a367f3
    
    Issue: ARROW-5921
    
    Benchmark before:
    
    ```
    Running ./release/arrow-ipc-read-write-benchmark
    Run on (4 X 2700 MHz CPU s)
    CPU Caches:
      L1 Data 32K (x2)
      L1 Instruction 32K (x2)
      L2 Unified 256K (x2)
      L3 Unified 3072K (x1)
    -----------------------------------------------------------------------
    Benchmark                                Time           CPU Iterations
    -----------------------------------------------------------------------
    WriteRecordBatch/1/real_time         87688 ns      87539 ns       7918   
11.1368GB/s
    WriteRecordBatch/4/real_time         85367 ns      85253 ns       8100   
11.4396GB/s
    WriteRecordBatch/16/real_time        91921 ns      91804 ns       7548   
10.6239GB/s
    WriteRecordBatch/64/real_time       104752 ns     104574 ns       6982    
9.3226GB/s
    WriteRecordBatch/256/real_time      162785 ns     162496 ns       4222   
5.99908GB/s
    WriteRecordBatch/1024/real_time     465386 ns     463993 ns       1485   
2.09839GB/s
    WriteRecordBatch/4096/real_time    1699539 ns    1693003 ns        356   
588.395MB/s
    WriteRecordBatch/8192/real_time    4211065 ns    4197466 ns        166    
237.47MB/s
    ReadRecordBatch/1/real_time            885 ns        884 ns     793175   
1103.52GB/s
    ReadRecordBatch/4/real_time           1658 ns       1657 ns     415874   
588.825GB/s
    ReadRecordBatch/16/real_time          5023 ns       5020 ns     139792   
194.416GB/s
    ReadRecordBatch/64/real_time         18926 ns      18911 ns      37075   
51.5991GB/s
    ReadRecordBatch/256/real_time        97449 ns      97351 ns       7180   
10.0213GB/s
    ReadRecordBatch/1024/real_time      393441 ns     393068 ns       1761   
2.48211GB/s
    ReadRecordBatch/4096/real_time     2271884 ns    2265855 ns        309   
440.163MB/s
    ReadRecordBatch/8192/real_time     4900659 ns    4886188 ns        143   
204.054MB/s
    ```
    
    Benchmark after:
    
    ```
    Running ./release/arrow-ipc-read-write-benchmark
    Run on (4 X 2700 MHz CPU s)
    CPU Caches:
      L1 Data 32K (x2)
      L1 Instruction 32K (x2)
      L2 Unified 256K (x2)
      L3 Unified 3072K (x1)
    -----------------------------------------------------------------------
    Benchmark                                Time           CPU Iterations
    -----------------------------------------------------------------------
    WriteRecordBatch/1/real_time         87828 ns      87674 ns       7990   
11.1191GB/s
    WriteRecordBatch/4/real_time         84026 ns      83867 ns       8467   
11.6222GB/s
    WriteRecordBatch/16/real_time        88073 ns      87904 ns       7940   
11.0881GB/s
    WriteRecordBatch/64/real_time        99008 ns      98867 ns       7026    
9.8635GB/s
    WriteRecordBatch/256/real_time      161226 ns     160809 ns       4357   
6.05712GB/s
    WriteRecordBatch/1024/real_time     453623 ns     452135 ns       1385   
2.15281GB/s
    WriteRecordBatch/4096/real_time    1710791 ns    1704345 ns        413   
584.525MB/s
    WriteRecordBatch/8192/real_time    4220409 ns    4207769 ns        166   
236.944MB/s
    ReadRecordBatch/1/real_time            861 ns        860 ns     809986   
1.10794TB/s
    ReadRecordBatch/4/real_time           1664 ns       1663 ns     423113   
586.955GB/s
    ReadRecordBatch/16/real_time          5122 ns       5118 ns     138673    
190.66GB/s
    ReadRecordBatch/64/real_time         19298 ns      19286 ns      36599   
50.6033GB/s
    ReadRecordBatch/256/real_time        97265 ns      97182 ns       7228   
10.0402GB/s
    ReadRecordBatch/1024/real_time      397791 ns     397433 ns       1756   
2.45497GB/s
    ReadRecordBatch/4096/real_time     1677272 ns    1673565 ns        409   
596.206MB/s
    ReadRecordBatch/8192/real_time     4891401 ns    4876719 ns        144    
204.44MB/s
    ```
    
    The benchmarks are really flaky and I cannot see a large difference, so I 
think this is OK.
    
    Author: Marco Neumann <[email protected]>
    
    Closes #4862 from crepererum/ARROW-5921 and squashes the following commits:
    
    127bbaf9c <Marco Neumann> update arrow-testing submodule
    32070ee0b <Marco Neumann> Fix multiple nullptr related crashes in IPC
---
 cpp/src/arrow/ipc/metadata-internal.cc | 35 +++++++++++++++++++++++++++++-----
 cpp/src/arrow/ipc/reader.cc            | 10 +++++++++-
 testing                                |  2 +-
 3 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/cpp/src/arrow/ipc/metadata-internal.cc 
b/cpp/src/arrow/ipc/metadata-internal.cc
index 4e1a157..f9b73df 100644
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -357,8 +357,13 @@ static Status TypeFromFlatbuffer(const flatbuf::Field* 
field,
                                  const std::vector<std::shared_ptr<Field>>& 
children,
                                  const KeyValueMetadata* field_metadata,
                                  std::shared_ptr<DataType>* out) {
+  auto type_data = field->type();
+  if (type_data == nullptr) {
+    return Status::IOError(
+      "Type-pointer in custom metadata of flatbuffer-encoded Field is null.");
+  }
   RETURN_NOT_OK(
-      ConcreteTypeFromFlatbuffer(field->type_type(), field->type(), children, 
out));
+      ConcreteTypeFromFlatbuffer(field->type_type(), type_data, children, 
out));
 
   // Look for extension metadata in custom_metadata field
   // TODO(wesm): Should this be part of the Field Flatbuffers table?
@@ -758,12 +763,22 @@ Status FieldFromFlatbuffer(const flatbuf::Field* field, 
DictionaryMemo* dictiona
     // based on the DictionaryEncoding metadata and record in the
     // dictionary_memo
     std::shared_ptr<DataType> index_type;
-    RETURN_NOT_OK(IntFromFlatbuffer(encoding->indexType(), &index_type));
+    auto int_data = encoding->indexType();
+    if (int_data == nullptr) {
+      return Status::IOError(
+        "indexType-pointer in custom metadata of flatbuffer-encoded 
DictionaryEncoding "
+        "is null.");
+    }
+    RETURN_NOT_OK(IntFromFlatbuffer(int_data, &index_type));
     type = ::arrow::dictionary(index_type, type, encoding->isOrdered());
     *out = ::arrow::field(field->name()->str(), type, field->nullable(), 
metadata);
     RETURN_NOT_OK(dictionary_memo->AddField(encoding->id(), *out));
   } else {
-    *out = ::arrow::field(field->name()->str(), type, field->nullable(), 
metadata);
+    auto name = field->name();
+    if (name == nullptr) {
+      return Status::IOError("Name-pointer of flatbuffer-encoded Field is 
null.");
+    }
+    *out = ::arrow::field(name->str(), type, field->nullable(), metadata);
   }
   return Status::OK();
 }
@@ -1137,7 +1152,12 @@ Status GetTensorMetadata(const Buffer& metadata, 
std::shared_ptr<DataType>* type
     }
   }
 
-  return ConcreteTypeFromFlatbuffer(tensor->type_type(), tensor->type(), {}, 
type);
+  auto type_data = tensor->type();
+  if (type_data == nullptr) {
+    return Status::IOError(
+      "Type-pointer in custom metadata of flatbuffer-encoded Tensor is null.");
+  }
+  return ConcreteTypeFromFlatbuffer(tensor->type_type(), type_data, {}, type);
 }
 
 Status GetSparseTensorMetadata(const Buffer& metadata, 
std::shared_ptr<DataType>* type,
@@ -1181,7 +1201,12 @@ Status GetSparseTensorMetadata(const Buffer& metadata, 
std::shared_ptr<DataType>
       return Status::Invalid("Unrecognized sparse index type");
   }
 
-  return ConcreteTypeFromFlatbuffer(sparse_tensor->type_type(), 
sparse_tensor->type(), {},
+  auto type_data = sparse_tensor->type();
+  if (type_data == nullptr) {
+    return Status::IOError(
+      "Type-pointer in custom metadata of flatbuffer-encoded SparseTensor is 
null.");
+  }
+  return ConcreteTypeFromFlatbuffer(sparse_tensor->type_type(), type_data, {},
                                     type);
 }
 
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index 002379e..c39f2d7 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -98,7 +98,12 @@ class IpcComponentSource {
       : metadata_(metadata), file_(file) {}
 
   Status GetBuffer(int buffer_index, std::shared_ptr<Buffer>* out) {
-    const flatbuf::Buffer* buffer = metadata_->buffers()->Get(buffer_index);
+    auto buffers = metadata_->buffers();
+    if (buffers == nullptr) {
+      return Status::IOError(
+          "Buffers-pointer of flatbuffer-encoded RecordBatch is null.");
+    }
+    const flatbuf::Buffer* buffer = buffers->Get(buffer_index);
 
     if (buffer->length() == 0) {
       *out = nullptr;
@@ -115,6 +120,9 @@ class IpcComponentSource {
 
   Status GetFieldMetadata(int field_index, ArrayData* out) {
     auto nodes = metadata_->nodes();
+    if (nodes == nullptr) {
+      return Status::IOError("Nodes-pointer of flatbuffer-encoded Table is 
null.");
+    }
     // pop off a field
     if (field_index >= static_cast<int>(nodes->size())) {
       return Status::Invalid("Ran out of field metadata, likely malformed");
diff --git a/testing b/testing
index a674dac..d14764e 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit a674dac190c5fc626964c9b611c67552fa2e530d
+Subproject commit d14764eff71c51156bea2a7860f8df811d6c9f11

Reply via email to