Repository: arrow Updated Branches: refs/heads/master 957a0e678 -> 4938d8d7c
ARROW-726: [C++] Fix segfault caused when passing non-buffer object to arrow::py::PyBuffer This leads to calling `Py_DECREF` on a null pointer Author: Wes McKinney <wes.mckin...@twosigma.com> Closes #459 from wesm/ARROW-726 and squashes the following commits: a764134 [Wes McKinney] Fix segfault caused when passing non-buffer object to arrow::py::PyBuffer. Fix some compiler warnings Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/4938d8d7 Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/4938d8d7 Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/4938d8d7 Branch: refs/heads/master Commit: 4938d8d7cea65d039650f684afaa29a74510c3e0 Parents: 957a0e6 Author: Wes McKinney <wes.mckin...@twosigma.com> Authored: Thu Mar 30 18:22:11 2017 -0400 Committer: Wes McKinney <wes.mckin...@twosigma.com> Committed: Thu Mar 30 18:22:11 2017 -0400 ---------------------------------------------------------------------- cpp/src/arrow/python/builtin_convert.cc | 4 ++-- cpp/src/arrow/python/common.cc | 4 ++-- cpp/src/arrow/python/pandas-test.cc | 10 ++++++++-- cpp/src/arrow/python/pandas_convert.cc | 10 +++++----- cpp/src/arrow/python/pandas_convert.h | 2 +- 5 files changed, 18 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/4938d8d7/cpp/src/arrow/python/builtin_convert.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc index 9acccc1..6e59845 100644 --- a/cpp/src/arrow/python/builtin_convert.cc +++ b/cpp/src/arrow/python/builtin_convert.cc @@ -390,7 +390,7 @@ class BytesConverter : public TypedConverter<BinaryBuilder> { // No error checking length = PyBytes_GET_SIZE(bytes_obj); bytes = PyBytes_AS_STRING(bytes_obj); - RETURN_NOT_OK(typed_builder_->Append(bytes, length)); + RETURN_NOT_OK(typed_builder_->Append(bytes, static_cast<int32_t>(length))); } return Status::OK(); } @@ -422,7 +422,7 @@ class UTF8Converter : public TypedConverter<StringBuilder> { // No error checking length = PyBytes_GET_SIZE(bytes_obj); bytes = PyBytes_AS_STRING(bytes_obj); - RETURN_NOT_OK(typed_builder_->Append(bytes, length)); + RETURN_NOT_OK(typed_builder_->Append(bytes, static_cast<int32_t>(length))); } return Status::OK(); } http://git-wip-us.apache.org/repos/asf/arrow/blob/4938d8d7/cpp/src/arrow/python/common.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/common.cc b/cpp/src/arrow/python/common.cc index a5aea30..717cb5c 100644 --- a/cpp/src/arrow/python/common.cc +++ b/cpp/src/arrow/python/common.cc @@ -47,7 +47,7 @@ MemoryPool* get_memory_pool() { // ---------------------------------------------------------------------- // PyBuffer -PyBuffer::PyBuffer(PyObject* obj) : Buffer(nullptr, 0) { +PyBuffer::PyBuffer(PyObject* obj) : Buffer(nullptr, 0), obj_(nullptr) { if (PyObject_CheckBuffer(obj)) { obj_ = PyMemoryView_FromObject(obj); Py_buffer* buffer = PyMemoryView_GET_BUFFER(obj_); @@ -61,7 +61,7 @@ PyBuffer::PyBuffer(PyObject* obj) : Buffer(nullptr, 0) { PyBuffer::~PyBuffer() { PyAcquireGIL lock; - Py_DECREF(obj_); + Py_XDECREF(obj_); } } // namespace py http://git-wip-us.apache.org/repos/asf/arrow/blob/4938d8d7/cpp/src/arrow/python/pandas-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/pandas-test.cc b/cpp/src/arrow/python/pandas-test.cc index 0d643df..a4e640b 100644 --- a/cpp/src/arrow/python/pandas-test.cc +++ b/cpp/src/arrow/python/pandas-test.cc @@ -24,20 +24,26 @@ #include "arrow/array.h" #include "arrow/builder.h" -#include "arrow/python/pandas_convert.h" #include "arrow/table.h" #include "arrow/test-util.h" #include "arrow/type.h" +#include "arrow/python/common.h" +#include "arrow/python/pandas_convert.h" + namespace arrow { namespace py { +TEST(PyBuffer, InvalidInputObject) { + PyBuffer buffer(Py_None); +} + TEST(PandasConversionTest, TestObjectBlockWriteFails) { StringBuilder builder(default_memory_pool()); const char value[] = {'\xf1', '\0'}; for (int i = 0; i < 1000; ++i) { - builder.Append(value, strlen(value)); + builder.Append(value, static_cast<int32_t>(strlen(value))); } std::shared_ptr<Array> arr; http://git-wip-us.apache.org/repos/asf/arrow/blob/4938d8d7/cpp/src/arrow/python/pandas_convert.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/pandas_convert.cc b/cpp/src/arrow/python/pandas_convert.cc index 685b1f4..db2e90e 100644 --- a/cpp/src/arrow/python/pandas_convert.cc +++ b/cpp/src/arrow/python/pandas_convert.cc @@ -159,13 +159,13 @@ Status AppendObjectStrings(StringBuilder& string_builder, PyObject** objects, PyErr_Clear(); return Status::TypeError("failed converting unicode to UTF8"); } - const int64_t length = PyBytes_GET_SIZE(obj); + const int32_t length = static_cast<int32_t>(PyBytes_GET_SIZE(obj)); Status s = string_builder.Append(PyBytes_AS_STRING(obj), length); Py_DECREF(obj); if (!s.ok()) { return s; } } else if (PyBytes_Check(obj)) { *have_bytes = true; - const int64_t length = PyBytes_GET_SIZE(obj); + const int32_t length = static_cast<int32_t>(PyBytes_GET_SIZE(obj)); RETURN_NOT_OK(string_builder.Append(PyBytes_AS_STRING(obj), length)); } else { string_builder.AppendNull(); @@ -235,7 +235,7 @@ class PandasConverter : public TypeVisitor { } Status InitNullBitmap() { - int null_bytes = BitUtil::BytesForBits(length_); + int64_t null_bytes = BitUtil::BytesForBits(length_); null_bitmap_ = std::make_shared<PoolBuffer>(pool_); RETURN_NOT_OK(null_bitmap_->Resize(null_bytes)); @@ -357,7 +357,7 @@ inline Status PandasConverter::ConvertData(std::shared_ptr<Buffer>* data) { template <> inline Status PandasConverter::ConvertData<BooleanType>(std::shared_ptr<Buffer>* data) { - int nbytes = BitUtil::BytesForBits(length_); + int64_t nbytes = BitUtil::BytesForBits(length_); auto buffer = std::make_shared<PoolBuffer>(pool_); RETURN_NOT_OK(buffer->Resize(nbytes)); @@ -423,7 +423,7 @@ Status PandasConverter::ConvertBooleans(std::shared_ptr<Array>* out) { PyObject** objects = reinterpret_cast<PyObject**>(PyArray_DATA(arr_)); - int nbytes = BitUtil::BytesForBits(length_); + int64_t nbytes = BitUtil::BytesForBits(length_); auto data = std::make_shared<PoolBuffer>(pool_); RETURN_NOT_OK(data->Resize(nbytes)); uint8_t* bitmap = data->mutable_data(); http://git-wip-us.apache.org/repos/asf/arrow/blob/4938d8d7/cpp/src/arrow/python/pandas_convert.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/pandas_convert.h b/cpp/src/arrow/python/pandas_convert.h index a33741e..12644d9 100644 --- a/cpp/src/arrow/python/pandas_convert.h +++ b/cpp/src/arrow/python/pandas_convert.h @@ -31,7 +31,7 @@ namespace arrow { class Array; class Column; -class DataType; +struct DataType; class MemoryPool; class Status; class Table;