This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 0c8aee1 ARROW-5921: [C++] Fix multiple nullptr related crashes in IPC
0c8aee1 is described below
commit 0c8aee18e66803b4e2c3e9007c4f553f54c87ff9
Author: Marco Neumann <[email protected]>
AuthorDate: Sat Jul 13 09:35:52 2019 -0500
ARROW-5921: [C++] Fix multiple nullptr related crashes in IPC
Fixes the following known fuzzer crashes:
- 09f72ba2a52b80366ab676364abec850fc668168
- 607e9caa76863a97f2694a769a1ae2fb83c55e02
- cb8cedb6ff8a6f164210c497d91069812ef5d6f8
- f37e71777ad0324b55b99224f2c7ffb0107bdfa2
- fd237566879dc60fff4d956d5fe3533d74a367f3
Issue: ARROW-5921
Benchmark before:
```
Running ./release/arrow-ipc-read-write-benchmark
Run on (4 X 2700 MHz CPU s)
CPU Caches:
L1 Data 32K (x2)
L1 Instruction 32K (x2)
L2 Unified 256K (x2)
L3 Unified 3072K (x1)
-----------------------------------------------------------------------
Benchmark Time CPU Iterations
-----------------------------------------------------------------------
WriteRecordBatch/1/real_time 87688 ns 87539 ns 7918
11.1368GB/s
WriteRecordBatch/4/real_time 85367 ns 85253 ns 8100
11.4396GB/s
WriteRecordBatch/16/real_time 91921 ns 91804 ns 7548
10.6239GB/s
WriteRecordBatch/64/real_time 104752 ns 104574 ns 6982
9.3226GB/s
WriteRecordBatch/256/real_time 162785 ns 162496 ns 4222
5.99908GB/s
WriteRecordBatch/1024/real_time 465386 ns 463993 ns 1485
2.09839GB/s
WriteRecordBatch/4096/real_time 1699539 ns 1693003 ns 356
588.395MB/s
WriteRecordBatch/8192/real_time 4211065 ns 4197466 ns 166
237.47MB/s
ReadRecordBatch/1/real_time 885 ns 884 ns 793175
1103.52GB/s
ReadRecordBatch/4/real_time 1658 ns 1657 ns 415874
588.825GB/s
ReadRecordBatch/16/real_time 5023 ns 5020 ns 139792
194.416GB/s
ReadRecordBatch/64/real_time 18926 ns 18911 ns 37075
51.5991GB/s
ReadRecordBatch/256/real_time 97449 ns 97351 ns 7180
10.0213GB/s
ReadRecordBatch/1024/real_time 393441 ns 393068 ns 1761
2.48211GB/s
ReadRecordBatch/4096/real_time 2271884 ns 2265855 ns 309
440.163MB/s
ReadRecordBatch/8192/real_time 4900659 ns 4886188 ns 143
204.054MB/s
```
Benchmark after:
```
Running ./release/arrow-ipc-read-write-benchmark
Run on (4 X 2700 MHz CPU s)
CPU Caches:
L1 Data 32K (x2)
L1 Instruction 32K (x2)
L2 Unified 256K (x2)
L3 Unified 3072K (x1)
-----------------------------------------------------------------------
Benchmark Time CPU Iterations
-----------------------------------------------------------------------
WriteRecordBatch/1/real_time 87828 ns 87674 ns 7990
11.1191GB/s
WriteRecordBatch/4/real_time 84026 ns 83867 ns 8467
11.6222GB/s
WriteRecordBatch/16/real_time 88073 ns 87904 ns 7940
11.0881GB/s
WriteRecordBatch/64/real_time 99008 ns 98867 ns 7026
9.8635GB/s
WriteRecordBatch/256/real_time 161226 ns 160809 ns 4357
6.05712GB/s
WriteRecordBatch/1024/real_time 453623 ns 452135 ns 1385
2.15281GB/s
WriteRecordBatch/4096/real_time 1710791 ns 1704345 ns 413
584.525MB/s
WriteRecordBatch/8192/real_time 4220409 ns 4207769 ns 166
236.944MB/s
ReadRecordBatch/1/real_time 861 ns 860 ns 809986
1.10794TB/s
ReadRecordBatch/4/real_time 1664 ns 1663 ns 423113
586.955GB/s
ReadRecordBatch/16/real_time 5122 ns 5118 ns 138673
190.66GB/s
ReadRecordBatch/64/real_time 19298 ns 19286 ns 36599
50.6033GB/s
ReadRecordBatch/256/real_time 97265 ns 97182 ns 7228
10.0402GB/s
ReadRecordBatch/1024/real_time 397791 ns 397433 ns 1756
2.45497GB/s
ReadRecordBatch/4096/real_time 1677272 ns 1673565 ns 409
596.206MB/s
ReadRecordBatch/8192/real_time 4891401 ns 4876719 ns 144
204.44MB/s
```
The benchmarks are really flaky and I cannot see a large difference, so I
think this is OK.
Author: Marco Neumann <[email protected]>
Closes #4862 from crepererum/ARROW-5921 and squashes the following commits:
127bbaf9c <Marco Neumann> update arrow-testing submodule
32070ee0b <Marco Neumann> Fix multiple nullptr related crashes in IPC
---
cpp/src/arrow/ipc/metadata-internal.cc | 35 +++++++++++++++++++++++++++++-----
cpp/src/arrow/ipc/reader.cc | 10 +++++++++-
testing | 2 +-
3 files changed, 40 insertions(+), 7 deletions(-)
diff --git a/cpp/src/arrow/ipc/metadata-internal.cc
b/cpp/src/arrow/ipc/metadata-internal.cc
index 4e1a157..f9b73df 100644
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -357,8 +357,13 @@ static Status TypeFromFlatbuffer(const flatbuf::Field*
field,
const std::vector<std::shared_ptr<Field>>&
children,
const KeyValueMetadata* field_metadata,
std::shared_ptr<DataType>* out) {
+ auto type_data = field->type();
+ if (type_data == nullptr) {
+ return Status::IOError(
+ "Type-pointer in custom metadata of flatbuffer-encoded Field is null.");
+ }
RETURN_NOT_OK(
- ConcreteTypeFromFlatbuffer(field->type_type(), field->type(), children,
out));
+ ConcreteTypeFromFlatbuffer(field->type_type(), type_data, children,
out));
// Look for extension metadata in custom_metadata field
// TODO(wesm): Should this be part of the Field Flatbuffers table?
@@ -758,12 +763,22 @@ Status FieldFromFlatbuffer(const flatbuf::Field* field,
DictionaryMemo* dictiona
// based on the DictionaryEncoding metadata and record in the
// dictionary_memo
std::shared_ptr<DataType> index_type;
- RETURN_NOT_OK(IntFromFlatbuffer(encoding->indexType(), &index_type));
+ auto int_data = encoding->indexType();
+ if (int_data == nullptr) {
+ return Status::IOError(
+ "indexType-pointer in custom metadata of flatbuffer-encoded
DictionaryEncoding "
+ "is null.");
+ }
+ RETURN_NOT_OK(IntFromFlatbuffer(int_data, &index_type));
type = ::arrow::dictionary(index_type, type, encoding->isOrdered());
*out = ::arrow::field(field->name()->str(), type, field->nullable(),
metadata);
RETURN_NOT_OK(dictionary_memo->AddField(encoding->id(), *out));
} else {
- *out = ::arrow::field(field->name()->str(), type, field->nullable(),
metadata);
+ auto name = field->name();
+ if (name == nullptr) {
+ return Status::IOError("Name-pointer of flatbuffer-encoded Field is
null.");
+ }
+ *out = ::arrow::field(name->str(), type, field->nullable(), metadata);
}
return Status::OK();
}
@@ -1137,7 +1152,12 @@ Status GetTensorMetadata(const Buffer& metadata,
std::shared_ptr<DataType>* type
}
}
- return ConcreteTypeFromFlatbuffer(tensor->type_type(), tensor->type(), {},
type);
+ auto type_data = tensor->type();
+ if (type_data == nullptr) {
+ return Status::IOError(
+ "Type-pointer in custom metadata of flatbuffer-encoded Tensor is null.");
+ }
+ return ConcreteTypeFromFlatbuffer(tensor->type_type(), type_data, {}, type);
}
Status GetSparseTensorMetadata(const Buffer& metadata,
std::shared_ptr<DataType>* type,
@@ -1181,7 +1201,12 @@ Status GetSparseTensorMetadata(const Buffer& metadata,
std::shared_ptr<DataType>
return Status::Invalid("Unrecognized sparse index type");
}
- return ConcreteTypeFromFlatbuffer(sparse_tensor->type_type(),
sparse_tensor->type(), {},
+ auto type_data = sparse_tensor->type();
+ if (type_data == nullptr) {
+ return Status::IOError(
+ "Type-pointer in custom metadata of flatbuffer-encoded SparseTensor is
null.");
+ }
+ return ConcreteTypeFromFlatbuffer(sparse_tensor->type_type(), type_data, {},
type);
}
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index 002379e..c39f2d7 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -98,7 +98,12 @@ class IpcComponentSource {
: metadata_(metadata), file_(file) {}
Status GetBuffer(int buffer_index, std::shared_ptr<Buffer>* out) {
- const flatbuf::Buffer* buffer = metadata_->buffers()->Get(buffer_index);
+ auto buffers = metadata_->buffers();
+ if (buffers == nullptr) {
+ return Status::IOError(
+ "Buffers-pointer of flatbuffer-encoded RecordBatch is null.");
+ }
+ const flatbuf::Buffer* buffer = buffers->Get(buffer_index);
if (buffer->length() == 0) {
*out = nullptr;
@@ -115,6 +120,9 @@ class IpcComponentSource {
Status GetFieldMetadata(int field_index, ArrayData* out) {
auto nodes = metadata_->nodes();
+ if (nodes == nullptr) {
+ return Status::IOError("Nodes-pointer of flatbuffer-encoded Table is
null.");
+ }
// pop off a field
if (field_index >= static_cast<int>(nodes->size())) {
return Status::Invalid("Ran out of field metadata, likely malformed");
diff --git a/testing b/testing
index a674dac..d14764e 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit a674dac190c5fc626964c9b611c67552fa2e530d
+Subproject commit d14764eff71c51156bea2a7860f8df811d6c9f11