This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new a28a198 ARROW-2389: [C++] Add CapacityError
a28a198 is described below
commit a28a19829609f7e40123a87be9e7659cde54aa16
Author: Antoine Pitrou <[email protected]>
AuthorDate: Mon May 7 13:37:14 2018 +0200
ARROW-2389: [C++] Add CapacityError
This error signals an attempt to exceed capacity of a buffer or container.
I initially thought I'd call this `OverflowError` but `CapacityError` makes it
clearer that it's not about overflow on arithmetic operations, iMHO.
Author: Antoine Pitrou <[email protected]>
Closes #1991 from pitrou/ARROW-2389-capacity-error and squashes the
following commits:
afc037d <Antoine Pitrou> ARROW-2389: Add CapacityError
---
cpp/src/arrow/builder.cc | 7 ++--
cpp/src/arrow/ipc/writer.cc | 2 +-
cpp/src/arrow/python/common.cc | 65 +++++++++++++++++-----------------
cpp/src/arrow/python/common.h | 13 +++++--
cpp/src/arrow/python/helpers.cc | 6 ++--
cpp/src/arrow/python/numpy_to_arrow.cc | 6 ++--
cpp/src/arrow/status.h | 6 ++++
python/pyarrow/error.pxi | 6 ++++
python/pyarrow/includes/common.pxd | 1 +
9 files changed, 67 insertions(+), 45 deletions(-)
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index 39363fd..6be35fe 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -1307,7 +1307,7 @@ Status ListBuilder::AppendNextOffset() {
std::stringstream ss;
ss << "ListArray cannot contain more then INT32_MAX - 1 child elements,"
<< " have " << num_values;
- return Status::Invalid(ss.str());
+ return Status::CapacityError(ss.str());
}
return offsets_builder_.Append(static_cast<int32_t>(num_values));
}
@@ -1386,7 +1386,8 @@ Status BinaryBuilder::Resize(int64_t capacity) {
Status BinaryBuilder::ReserveData(int64_t elements) {
if (value_data_length() + elements > value_data_capacity()) {
if (value_data_length() + elements > kBinaryMemoryLimit) {
- return Status::Invalid("Cannot reserve capacity larger than 2^31 - 1 for
binary");
+ return Status::CapacityError(
+ "Cannot reserve capacity larger than 2^31 - 1 for binary");
}
RETURN_NOT_OK(value_data_builder_.Reserve(elements));
}
@@ -1399,7 +1400,7 @@ Status BinaryBuilder::AppendNextOffset() {
std::stringstream ss;
ss << "BinaryArray cannot contain more than " << kBinaryMemoryLimit << "
bytes, have "
<< num_bytes;
- return Status::Invalid(ss.str());
+ return Status::CapacityError(ss.str());
}
return offsets_builder_.Append(static_cast<int32_t>(num_bytes));
}
diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc
index 84830e6..71a33d8 100644
--- a/cpp/src/arrow/ipc/writer.cc
+++ b/cpp/src/arrow/ipc/writer.cc
@@ -115,7 +115,7 @@ class RecordBatchSerializer : public ArrayVisitor {
}
if (!allow_64bit_ && arr.length() > std::numeric_limits<int32_t>::max()) {
- return Status::Invalid("Cannot write arrays larger than 2^31 - 1 in
length");
+ return Status::CapacityError("Cannot write arrays larger than 2^31 - 1
in length");
}
// push back all common elements
diff --git a/cpp/src/arrow/python/common.cc b/cpp/src/arrow/python/common.cc
index a565d00..7b400e0 100644
--- a/cpp/src/arrow/python/common.cc
+++ b/cpp/src/arrow/python/common.cc
@@ -83,41 +83,40 @@ PyBuffer::~PyBuffer() {
// ----------------------------------------------------------------------
// Python exception -> Status
-Status CheckPyError(StatusCode code) {
- if (PyErr_Occurred()) {
- PyObject* exc_type = nullptr;
- PyObject* exc_value = nullptr;
- PyObject* traceback = nullptr;
-
- PyErr_Fetch(&exc_type, &exc_value, &traceback);
- PyErr_NormalizeException(&exc_type, &exc_value, &traceback);
-
- OwnedRef exc_type_ref(exc_type);
- OwnedRef exc_value_ref(exc_value);
- OwnedRef traceback_ref(traceback);
-
- std::string message;
- RETURN_NOT_OK(internal::PyObject_StdStringStr(exc_value, &message));
-
- if (code == StatusCode::UnknownError) {
- // Try to match the Python exception type with an appropriate Status code
- if (PyErr_GivenExceptionMatches(exc_type, PyExc_MemoryError)) {
- code = StatusCode::OutOfMemory;
- } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_KeyError)) {
- code = StatusCode::KeyError;
- } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_TypeError)) {
- code = StatusCode::TypeError;
- } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_ValueError)) {
- code = StatusCode::Invalid;
- } else if (PyErr_GivenExceptionMatches(exc_type,
PyExc_EnvironmentError)) {
- code = StatusCode::IOError;
- } else if (PyErr_GivenExceptionMatches(exc_type,
PyExc_NotImplementedError)) {
- code = StatusCode::NotImplemented;
- }
+Status ConvertPyError(StatusCode code) {
+ PyObject* exc_type = nullptr;
+ PyObject* exc_value = nullptr;
+ PyObject* traceback = nullptr;
+
+ PyErr_Fetch(&exc_type, &exc_value, &traceback);
+ PyErr_NormalizeException(&exc_type, &exc_value, &traceback);
+
+ DCHECK_NE(exc_type, nullptr);
+
+ OwnedRef exc_type_ref(exc_type);
+ OwnedRef exc_value_ref(exc_value);
+ OwnedRef traceback_ref(traceback);
+
+ std::string message;
+ RETURN_NOT_OK(internal::PyObject_StdStringStr(exc_value, &message));
+
+ if (code == StatusCode::UnknownError) {
+ // Try to match the Python exception type with an appropriate Status code
+ if (PyErr_GivenExceptionMatches(exc_type, PyExc_MemoryError)) {
+ code = StatusCode::OutOfMemory;
+ } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_KeyError)) {
+ code = StatusCode::KeyError;
+ } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_TypeError)) {
+ code = StatusCode::TypeError;
+ } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_ValueError)) {
+ code = StatusCode::Invalid;
+ } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_EnvironmentError)) {
+ code = StatusCode::IOError;
+ } else if (PyErr_GivenExceptionMatches(exc_type,
PyExc_NotImplementedError)) {
+ code = StatusCode::NotImplemented;
}
- return Status(code, message);
}
- return Status::OK();
+ return Status(code, message);
}
Status PassPyError() {
diff --git a/cpp/src/arrow/python/common.h b/cpp/src/arrow/python/common.h
index a61d067..6eecd59 100644
--- a/cpp/src/arrow/python/common.h
+++ b/cpp/src/arrow/python/common.h
@@ -35,8 +35,17 @@ class MemoryPool;
namespace py {
-// TODO: inline the successful case
-ARROW_EXPORT Status CheckPyError(StatusCode code = StatusCode::UnknownError);
+ARROW_EXPORT Status ConvertPyError(StatusCode code = StatusCode::UnknownError);
+
+// Catch a pending Python exception and return the corresponding Status.
+// If no exception is pending, Status::OK() is returned.
+inline Status CheckPyError(StatusCode code = StatusCode::UnknownError) {
+ if (ARROW_PREDICT_TRUE(!PyErr_Occurred())) {
+ return Status::OK();
+ } else {
+ return ConvertPyError(code);
+ }
+}
ARROW_EXPORT Status PassPyError();
diff --git a/cpp/src/arrow/python/helpers.cc b/cpp/src/arrow/python/helpers.cc
index b96d5ff..862157e 100644
--- a/cpp/src/arrow/python/helpers.cc
+++ b/cpp/src/arrow/python/helpers.cc
@@ -165,7 +165,7 @@ Status BuilderAppend(BinaryBuilder* builder, PyObject* obj,
bool* is_full) {
*is_full = true;
return Status::OK();
} else {
- return Status::Invalid("Maximum array size reached (2GB)");
+ return Status::CapacityError("Maximum array size reached (2GB)");
}
}
RETURN_NOT_OK(builder->Append(view.bytes, length));
@@ -194,7 +194,7 @@ Status BuilderAppend(FixedSizeBinaryBuilder* builder,
PyObject* obj, bool* is_fu
*is_full = true;
return Status::OK();
} else {
- return Status::Invalid("Maximum array size reached (2GB)");
+ return Status::CapacityError("Maximum array size reached (2GB)");
}
}
RETURN_NOT_OK(builder->Append(view.bytes));
@@ -216,7 +216,7 @@ Status BuilderAppend(StringBuilder* builder, PyObject* obj,
bool check_valid,
*is_full = true;
return Status::OK();
} else {
- return Status::Invalid("Maximum array size reached (2GB)");
+ return Status::CapacityError("Maximum array size reached (2GB)");
}
}
RETURN_NOT_OK(builder->Append(view.bytes, length));
diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc
b/cpp/src/arrow/python/numpy_to_arrow.cc
index 793c318..e160e90 100644
--- a/cpp/src/arrow/python/numpy_to_arrow.cc
+++ b/cpp/src/arrow/python/numpy_to_arrow.cc
@@ -1310,7 +1310,7 @@ inline Status
NumPyConverter::ConvertTypedLists<NPY_OBJECT, BinaryType>(
RETURN_NOT_OK(
AppendObjectBinaries(numpy_array, nullptr, 0, value_builder,
&offset));
if (offset < PyArray_SIZE(numpy_array)) {
- return Status::Invalid("Array cell value exceeded 2GB");
+ return Status::CapacityError("Array cell value exceeded 2GB");
}
return Status::OK();
} else if (PyList_Check(object)) {
@@ -1367,7 +1367,7 @@ inline Status
NumPyConverter::ConvertTypedLists<NPY_OBJECT, StringType>(
RETURN_NOT_OK(AppendObjectStrings(numpy_array, nullptr, 0, check_valid,
value_builder, &offset, &have_bytes));
if (offset < PyArray_SIZE(numpy_array)) {
- return Status::Invalid("Array cell value exceeded 2GB");
+ return Status::CapacityError("Array cell value exceeded 2GB");
}
return Status::OK();
} else if (PyList_Check(object)) {
@@ -1514,7 +1514,7 @@ Status AppendUTF32(const char* data, int itemsize, int
byteorder,
const int32_t length =
static_cast<int32_t>(PyBytes_GET_SIZE(utf8_obj.obj()));
if (builder->value_data_length() + length > kBinaryMemoryLimit) {
- return Status::Invalid("Encoded string length exceeds maximum size (2GB)");
+ return Status::CapacityError("Encoded string length exceeds maximum size
(2GB)");
}
return builder->Append(PyBytes_AS_STRING(utf8_obj.obj()), length);
}
diff --git a/cpp/src/arrow/status.h b/cpp/src/arrow/status.h
index 8b82e2a..0e7f014 100644
--- a/cpp/src/arrow/status.h
+++ b/cpp/src/arrow/status.h
@@ -74,6 +74,7 @@ enum class StatusCode : char {
TypeError = 3,
Invalid = 4,
IOError = 5,
+ CapacityError = 6,
UnknownError = 9,
NotImplemented = 10,
SerializationError = 11,
@@ -129,6 +130,10 @@ class ARROW_EXPORT Status {
return Status(StatusCode::Invalid, msg);
}
+ static Status CapacityError(const std::string& msg) {
+ return Status(StatusCode::CapacityError, msg);
+ }
+
static Status IOError(const std::string& msg) {
return Status(StatusCode::IOError, msg);
}
@@ -160,6 +165,7 @@ class ARROW_EXPORT Status {
bool IsKeyError() const { return code() == StatusCode::KeyError; }
bool IsInvalid() const { return code() == StatusCode::Invalid; }
bool IsIOError() const { return code() == StatusCode::IOError; }
+ bool IsCapacityError() const { return code() == StatusCode::CapacityError; }
bool IsTypeError() const { return code() == StatusCode::TypeError; }
bool IsUnknownError() const { return code() == StatusCode::UnknownError; }
bool IsNotImplemented() const { return code() == StatusCode::NotImplemented;
}
diff --git a/python/pyarrow/error.pxi b/python/pyarrow/error.pxi
index dfdfcd7..abd5a18 100644
--- a/python/pyarrow/error.pxi
+++ b/python/pyarrow/error.pxi
@@ -48,6 +48,10 @@ class ArrowNotImplementedError(NotImplementedError,
ArrowException):
pass
+class ArrowCapacityError(ArrowException):
+ pass
+
+
class PlasmaObjectExists(ArrowException):
pass
@@ -85,6 +89,8 @@ cdef int check_status(const CStatus& status) nogil except -1:
raise ArrowNotImplementedError(message)
elif status.IsTypeError():
raise ArrowTypeError(message)
+ elif status.IsCapacityError():
+ raise ArrowCapacityError(message)
elif status.IsPlasmaObjectExists():
raise PlasmaObjectExists(message)
elif status.IsPlasmaObjectNonexistent():
diff --git a/python/pyarrow/includes/common.pxd
b/python/pyarrow/includes/common.pxd
index 16eae92..23617b6 100644
--- a/python/pyarrow/includes/common.pxd
+++ b/python/pyarrow/includes/common.pxd
@@ -56,6 +56,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
c_bool IsKeyError()
c_bool IsNotImplemented()
c_bool IsTypeError()
+ c_bool IsCapacityError()
c_bool IsSerializationError()
c_bool IsPythonError()
c_bool IsPlasmaObjectExists()
--
To stop receiving notification emails like this one, please contact
[email protected].