This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch maint-0.14.x in repository https://gitbox.apache.org/repos/asf/arrow.git
commit de7d5ffe1634037392e698815242353d73796911 Author: Marco Neumann <[email protected]> AuthorDate: Sat Jul 13 09:35:52 2019 -0500 ARROW-5921: [C++] Fix multiple nullptr related crashes in IPC Fixes the following known fuzzer crashes: - 09f72ba2a52b80366ab676364abec850fc668168 - 607e9caa76863a97f2694a769a1ae2fb83c55e02 - cb8cedb6ff8a6f164210c497d91069812ef5d6f8 - f37e71777ad0324b55b99224f2c7ffb0107bdfa2 - fd237566879dc60fff4d956d5fe3533d74a367f3 Issue: ARROW-5921 Benchmark before: ``` Running ./release/arrow-ipc-read-write-benchmark Run on (4 X 2700 MHz CPU s) CPU Caches: L1 Data 32K (x2) L1 Instruction 32K (x2) L2 Unified 256K (x2) L3 Unified 3072K (x1) ----------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------- WriteRecordBatch/1/real_time 87688 ns 87539 ns 7918 11.1368GB/s WriteRecordBatch/4/real_time 85367 ns 85253 ns 8100 11.4396GB/s WriteRecordBatch/16/real_time 91921 ns 91804 ns 7548 10.6239GB/s WriteRecordBatch/64/real_time 104752 ns 104574 ns 6982 9.3226GB/s WriteRecordBatch/256/real_time 162785 ns 162496 ns 4222 5.99908GB/s WriteRecordBatch/1024/real_time 465386 ns 463993 ns 1485 2.09839GB/s WriteRecordBatch/4096/real_time 1699539 ns 1693003 ns 356 588.395MB/s WriteRecordBatch/8192/real_time 4211065 ns 4197466 ns 166 237.47MB/s ReadRecordBatch/1/real_time 885 ns 884 ns 793175 1103.52GB/s ReadRecordBatch/4/real_time 1658 ns 1657 ns 415874 588.825GB/s ReadRecordBatch/16/real_time 5023 ns 5020 ns 139792 194.416GB/s ReadRecordBatch/64/real_time 18926 ns 18911 ns 37075 51.5991GB/s ReadRecordBatch/256/real_time 97449 ns 97351 ns 7180 10.0213GB/s ReadRecordBatch/1024/real_time 393441 ns 393068 ns 1761 2.48211GB/s ReadRecordBatch/4096/real_time 2271884 ns 2265855 ns 309 440.163MB/s ReadRecordBatch/8192/real_time 4900659 ns 4886188 ns 143 204.054MB/s ``` Benchmark after: ``` Running ./release/arrow-ipc-read-write-benchmark Run on (4 X 2700 MHz CPU s) CPU Caches: L1 Data 32K (x2) L1 Instruction 32K (x2) L2 Unified 256K (x2) L3 Unified 3072K (x1) ----------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------- WriteRecordBatch/1/real_time 87828 ns 87674 ns 7990 11.1191GB/s WriteRecordBatch/4/real_time 84026 ns 83867 ns 8467 11.6222GB/s WriteRecordBatch/16/real_time 88073 ns 87904 ns 7940 11.0881GB/s WriteRecordBatch/64/real_time 99008 ns 98867 ns 7026 9.8635GB/s WriteRecordBatch/256/real_time 161226 ns 160809 ns 4357 6.05712GB/s WriteRecordBatch/1024/real_time 453623 ns 452135 ns 1385 2.15281GB/s WriteRecordBatch/4096/real_time 1710791 ns 1704345 ns 413 584.525MB/s WriteRecordBatch/8192/real_time 4220409 ns 4207769 ns 166 236.944MB/s ReadRecordBatch/1/real_time 861 ns 860 ns 809986 1.10794TB/s ReadRecordBatch/4/real_time 1664 ns 1663 ns 423113 586.955GB/s ReadRecordBatch/16/real_time 5122 ns 5118 ns 138673 190.66GB/s ReadRecordBatch/64/real_time 19298 ns 19286 ns 36599 50.6033GB/s ReadRecordBatch/256/real_time 97265 ns 97182 ns 7228 10.0402GB/s ReadRecordBatch/1024/real_time 397791 ns 397433 ns 1756 2.45497GB/s ReadRecordBatch/4096/real_time 1677272 ns 1673565 ns 409 596.206MB/s ReadRecordBatch/8192/real_time 4891401 ns 4876719 ns 144 204.44MB/s ``` The benchmarks are really flaky and I cannot see a large difference, so I think this is OK. Author: Marco Neumann <[email protected]> Closes #4862 from crepererum/ARROW-5921 and squashes the following commits: 127bbaf9c <Marco Neumann> update arrow-testing submodule 32070ee0b <Marco Neumann> Fix multiple nullptr related crashes in IPC --- cpp/src/arrow/ipc/metadata-internal.cc | 35 +++++++++++++++++++++++++++++----- cpp/src/arrow/ipc/reader.cc | 10 +++++++++- testing | 2 +- 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc index 4e1a157..f9b73df 100644 --- a/cpp/src/arrow/ipc/metadata-internal.cc +++ b/cpp/src/arrow/ipc/metadata-internal.cc @@ -357,8 +357,13 @@ static Status TypeFromFlatbuffer(const flatbuf::Field* field, const std::vector<std::shared_ptr<Field>>& children, const KeyValueMetadata* field_metadata, std::shared_ptr<DataType>* out) { + auto type_data = field->type(); + if (type_data == nullptr) { + return Status::IOError( + "Type-pointer in custom metadata of flatbuffer-encoded Field is null."); + } RETURN_NOT_OK( - ConcreteTypeFromFlatbuffer(field->type_type(), field->type(), children, out)); + ConcreteTypeFromFlatbuffer(field->type_type(), type_data, children, out)); // Look for extension metadata in custom_metadata field // TODO(wesm): Should this be part of the Field Flatbuffers table? @@ -758,12 +763,22 @@ Status FieldFromFlatbuffer(const flatbuf::Field* field, DictionaryMemo* dictiona // based on the DictionaryEncoding metadata and record in the // dictionary_memo std::shared_ptr<DataType> index_type; - RETURN_NOT_OK(IntFromFlatbuffer(encoding->indexType(), &index_type)); + auto int_data = encoding->indexType(); + if (int_data == nullptr) { + return Status::IOError( + "indexType-pointer in custom metadata of flatbuffer-encoded DictionaryEncoding " + "is null."); + } + RETURN_NOT_OK(IntFromFlatbuffer(int_data, &index_type)); type = ::arrow::dictionary(index_type, type, encoding->isOrdered()); *out = ::arrow::field(field->name()->str(), type, field->nullable(), metadata); RETURN_NOT_OK(dictionary_memo->AddField(encoding->id(), *out)); } else { - *out = ::arrow::field(field->name()->str(), type, field->nullable(), metadata); + auto name = field->name(); + if (name == nullptr) { + return Status::IOError("Name-pointer of flatbuffer-encoded Field is null."); + } + *out = ::arrow::field(name->str(), type, field->nullable(), metadata); } return Status::OK(); } @@ -1137,7 +1152,12 @@ Status GetTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type } } - return ConcreteTypeFromFlatbuffer(tensor->type_type(), tensor->type(), {}, type); + auto type_data = tensor->type(); + if (type_data == nullptr) { + return Status::IOError( + "Type-pointer in custom metadata of flatbuffer-encoded Tensor is null."); + } + return ConcreteTypeFromFlatbuffer(tensor->type_type(), type_data, {}, type); } Status GetSparseTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type, @@ -1181,7 +1201,12 @@ Status GetSparseTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType> return Status::Invalid("Unrecognized sparse index type"); } - return ConcreteTypeFromFlatbuffer(sparse_tensor->type_type(), sparse_tensor->type(), {}, + auto type_data = sparse_tensor->type(); + if (type_data == nullptr) { + return Status::IOError( + "Type-pointer in custom metadata of flatbuffer-encoded SparseTensor is null."); + } + return ConcreteTypeFromFlatbuffer(sparse_tensor->type_type(), type_data, {}, type); } diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc index 002379e..c39f2d7 100644 --- a/cpp/src/arrow/ipc/reader.cc +++ b/cpp/src/arrow/ipc/reader.cc @@ -98,7 +98,12 @@ class IpcComponentSource { : metadata_(metadata), file_(file) {} Status GetBuffer(int buffer_index, std::shared_ptr<Buffer>* out) { - const flatbuf::Buffer* buffer = metadata_->buffers()->Get(buffer_index); + auto buffers = metadata_->buffers(); + if (buffers == nullptr) { + return Status::IOError( + "Buffers-pointer of flatbuffer-encoded RecordBatch is null."); + } + const flatbuf::Buffer* buffer = buffers->Get(buffer_index); if (buffer->length() == 0) { *out = nullptr; @@ -115,6 +120,9 @@ class IpcComponentSource { Status GetFieldMetadata(int field_index, ArrayData* out) { auto nodes = metadata_->nodes(); + if (nodes == nullptr) { + return Status::IOError("Nodes-pointer of flatbuffer-encoded Table is null."); + } // pop off a field if (field_index >= static_cast<int>(nodes->size())) { return Status::Invalid("Ran out of field metadata, likely malformed"); diff --git a/testing b/testing index a674dac..d14764e 160000 --- a/testing +++ b/testing @@ -1 +1 @@ -Subproject commit a674dac190c5fc626964c9b611c67552fa2e530d +Subproject commit d14764eff71c51156bea2a7860f8df811d6c9f11
