This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new a28a198  ARROW-2389: [C++] Add CapacityError
a28a198 is described below

commit a28a19829609f7e40123a87be9e7659cde54aa16
Author: Antoine Pitrou <[email protected]>
AuthorDate: Mon May 7 13:37:14 2018 +0200

    ARROW-2389: [C++] Add CapacityError
    
    This error signals an attempt to exceed capacity of a buffer or container. 
I initially thought I'd call this `OverflowError` but `CapacityError` makes it 
clearer that it's not about overflow on arithmetic operations, iMHO.
    
    Author: Antoine Pitrou <[email protected]>
    
    Closes #1991 from pitrou/ARROW-2389-capacity-error and squashes the 
following commits:
    
    afc037d <Antoine Pitrou> ARROW-2389:  Add CapacityError
---
 cpp/src/arrow/builder.cc               |  7 ++--
 cpp/src/arrow/ipc/writer.cc            |  2 +-
 cpp/src/arrow/python/common.cc         | 65 +++++++++++++++++-----------------
 cpp/src/arrow/python/common.h          | 13 +++++--
 cpp/src/arrow/python/helpers.cc        |  6 ++--
 cpp/src/arrow/python/numpy_to_arrow.cc |  6 ++--
 cpp/src/arrow/status.h                 |  6 ++++
 python/pyarrow/error.pxi               |  6 ++++
 python/pyarrow/includes/common.pxd     |  1 +
 9 files changed, 67 insertions(+), 45 deletions(-)

diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index 39363fd..6be35fe 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -1307,7 +1307,7 @@ Status ListBuilder::AppendNextOffset() {
     std::stringstream ss;
     ss << "ListArray cannot contain more then INT32_MAX - 1 child elements,"
        << " have " << num_values;
-    return Status::Invalid(ss.str());
+    return Status::CapacityError(ss.str());
   }
   return offsets_builder_.Append(static_cast<int32_t>(num_values));
 }
@@ -1386,7 +1386,8 @@ Status BinaryBuilder::Resize(int64_t capacity) {
 Status BinaryBuilder::ReserveData(int64_t elements) {
   if (value_data_length() + elements > value_data_capacity()) {
     if (value_data_length() + elements > kBinaryMemoryLimit) {
-      return Status::Invalid("Cannot reserve capacity larger than 2^31 - 1 for 
binary");
+      return Status::CapacityError(
+          "Cannot reserve capacity larger than 2^31 - 1 for binary");
     }
     RETURN_NOT_OK(value_data_builder_.Reserve(elements));
   }
@@ -1399,7 +1400,7 @@ Status BinaryBuilder::AppendNextOffset() {
     std::stringstream ss;
     ss << "BinaryArray cannot contain more than " << kBinaryMemoryLimit << " 
bytes, have "
        << num_bytes;
-    return Status::Invalid(ss.str());
+    return Status::CapacityError(ss.str());
   }
   return offsets_builder_.Append(static_cast<int32_t>(num_bytes));
 }
diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc
index 84830e6..71a33d8 100644
--- a/cpp/src/arrow/ipc/writer.cc
+++ b/cpp/src/arrow/ipc/writer.cc
@@ -115,7 +115,7 @@ class RecordBatchSerializer : public ArrayVisitor {
     }
 
     if (!allow_64bit_ && arr.length() > std::numeric_limits<int32_t>::max()) {
-      return Status::Invalid("Cannot write arrays larger than 2^31 - 1 in 
length");
+      return Status::CapacityError("Cannot write arrays larger than 2^31 - 1 
in length");
     }
 
     // push back all common elements
diff --git a/cpp/src/arrow/python/common.cc b/cpp/src/arrow/python/common.cc
index a565d00..7b400e0 100644
--- a/cpp/src/arrow/python/common.cc
+++ b/cpp/src/arrow/python/common.cc
@@ -83,41 +83,40 @@ PyBuffer::~PyBuffer() {
 // ----------------------------------------------------------------------
 // Python exception -> Status
 
-Status CheckPyError(StatusCode code) {
-  if (PyErr_Occurred()) {
-    PyObject* exc_type = nullptr;
-    PyObject* exc_value = nullptr;
-    PyObject* traceback = nullptr;
-
-    PyErr_Fetch(&exc_type, &exc_value, &traceback);
-    PyErr_NormalizeException(&exc_type, &exc_value, &traceback);
-
-    OwnedRef exc_type_ref(exc_type);
-    OwnedRef exc_value_ref(exc_value);
-    OwnedRef traceback_ref(traceback);
-
-    std::string message;
-    RETURN_NOT_OK(internal::PyObject_StdStringStr(exc_value, &message));
-
-    if (code == StatusCode::UnknownError) {
-      // Try to match the Python exception type with an appropriate Status code
-      if (PyErr_GivenExceptionMatches(exc_type, PyExc_MemoryError)) {
-        code = StatusCode::OutOfMemory;
-      } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_KeyError)) {
-        code = StatusCode::KeyError;
-      } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_TypeError)) {
-        code = StatusCode::TypeError;
-      } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_ValueError)) {
-        code = StatusCode::Invalid;
-      } else if (PyErr_GivenExceptionMatches(exc_type, 
PyExc_EnvironmentError)) {
-        code = StatusCode::IOError;
-      } else if (PyErr_GivenExceptionMatches(exc_type, 
PyExc_NotImplementedError)) {
-        code = StatusCode::NotImplemented;
-      }
+Status ConvertPyError(StatusCode code) {
+  PyObject* exc_type = nullptr;
+  PyObject* exc_value = nullptr;
+  PyObject* traceback = nullptr;
+
+  PyErr_Fetch(&exc_type, &exc_value, &traceback);
+  PyErr_NormalizeException(&exc_type, &exc_value, &traceback);
+
+  DCHECK_NE(exc_type, nullptr);
+
+  OwnedRef exc_type_ref(exc_type);
+  OwnedRef exc_value_ref(exc_value);
+  OwnedRef traceback_ref(traceback);
+
+  std::string message;
+  RETURN_NOT_OK(internal::PyObject_StdStringStr(exc_value, &message));
+
+  if (code == StatusCode::UnknownError) {
+    // Try to match the Python exception type with an appropriate Status code
+    if (PyErr_GivenExceptionMatches(exc_type, PyExc_MemoryError)) {
+      code = StatusCode::OutOfMemory;
+    } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_KeyError)) {
+      code = StatusCode::KeyError;
+    } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_TypeError)) {
+      code = StatusCode::TypeError;
+    } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_ValueError)) {
+      code = StatusCode::Invalid;
+    } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_EnvironmentError)) {
+      code = StatusCode::IOError;
+    } else if (PyErr_GivenExceptionMatches(exc_type, 
PyExc_NotImplementedError)) {
+      code = StatusCode::NotImplemented;
     }
-    return Status(code, message);
   }
-  return Status::OK();
+  return Status(code, message);
 }
 
 Status PassPyError() {
diff --git a/cpp/src/arrow/python/common.h b/cpp/src/arrow/python/common.h
index a61d067..6eecd59 100644
--- a/cpp/src/arrow/python/common.h
+++ b/cpp/src/arrow/python/common.h
@@ -35,8 +35,17 @@ class MemoryPool;
 
 namespace py {
 
-// TODO: inline the successful case
-ARROW_EXPORT Status CheckPyError(StatusCode code = StatusCode::UnknownError);
+ARROW_EXPORT Status ConvertPyError(StatusCode code = StatusCode::UnknownError);
+
+// Catch a pending Python exception and return the corresponding Status.
+// If no exception is pending, Status::OK() is returned.
+inline Status CheckPyError(StatusCode code = StatusCode::UnknownError) {
+  if (ARROW_PREDICT_TRUE(!PyErr_Occurred())) {
+    return Status::OK();
+  } else {
+    return ConvertPyError(code);
+  }
+}
 
 ARROW_EXPORT Status PassPyError();
 
diff --git a/cpp/src/arrow/python/helpers.cc b/cpp/src/arrow/python/helpers.cc
index b96d5ff..862157e 100644
--- a/cpp/src/arrow/python/helpers.cc
+++ b/cpp/src/arrow/python/helpers.cc
@@ -165,7 +165,7 @@ Status BuilderAppend(BinaryBuilder* builder, PyObject* obj, 
bool* is_full) {
       *is_full = true;
       return Status::OK();
     } else {
-      return Status::Invalid("Maximum array size reached (2GB)");
+      return Status::CapacityError("Maximum array size reached (2GB)");
     }
   }
   RETURN_NOT_OK(builder->Append(view.bytes, length));
@@ -194,7 +194,7 @@ Status BuilderAppend(FixedSizeBinaryBuilder* builder, 
PyObject* obj, bool* is_fu
       *is_full = true;
       return Status::OK();
     } else {
-      return Status::Invalid("Maximum array size reached (2GB)");
+      return Status::CapacityError("Maximum array size reached (2GB)");
     }
   }
   RETURN_NOT_OK(builder->Append(view.bytes));
@@ -216,7 +216,7 @@ Status BuilderAppend(StringBuilder* builder, PyObject* obj, 
bool check_valid,
       *is_full = true;
       return Status::OK();
     } else {
-      return Status::Invalid("Maximum array size reached (2GB)");
+      return Status::CapacityError("Maximum array size reached (2GB)");
     }
   }
   RETURN_NOT_OK(builder->Append(view.bytes, length));
diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc 
b/cpp/src/arrow/python/numpy_to_arrow.cc
index 793c318..e160e90 100644
--- a/cpp/src/arrow/python/numpy_to_arrow.cc
+++ b/cpp/src/arrow/python/numpy_to_arrow.cc
@@ -1310,7 +1310,7 @@ inline Status 
NumPyConverter::ConvertTypedLists<NPY_OBJECT, BinaryType>(
       RETURN_NOT_OK(
           AppendObjectBinaries(numpy_array, nullptr, 0, value_builder, 
&offset));
       if (offset < PyArray_SIZE(numpy_array)) {
-        return Status::Invalid("Array cell value exceeded 2GB");
+        return Status::CapacityError("Array cell value exceeded 2GB");
       }
       return Status::OK();
     } else if (PyList_Check(object)) {
@@ -1367,7 +1367,7 @@ inline Status 
NumPyConverter::ConvertTypedLists<NPY_OBJECT, StringType>(
       RETURN_NOT_OK(AppendObjectStrings(numpy_array, nullptr, 0, check_valid,
                                         value_builder, &offset, &have_bytes));
       if (offset < PyArray_SIZE(numpy_array)) {
-        return Status::Invalid("Array cell value exceeded 2GB");
+        return Status::CapacityError("Array cell value exceeded 2GB");
       }
       return Status::OK();
     } else if (PyList_Check(object)) {
@@ -1514,7 +1514,7 @@ Status AppendUTF32(const char* data, int itemsize, int 
byteorder,
 
   const int32_t length = 
static_cast<int32_t>(PyBytes_GET_SIZE(utf8_obj.obj()));
   if (builder->value_data_length() + length > kBinaryMemoryLimit) {
-    return Status::Invalid("Encoded string length exceeds maximum size (2GB)");
+    return Status::CapacityError("Encoded string length exceeds maximum size 
(2GB)");
   }
   return builder->Append(PyBytes_AS_STRING(utf8_obj.obj()), length);
 }
diff --git a/cpp/src/arrow/status.h b/cpp/src/arrow/status.h
index 8b82e2a..0e7f014 100644
--- a/cpp/src/arrow/status.h
+++ b/cpp/src/arrow/status.h
@@ -74,6 +74,7 @@ enum class StatusCode : char {
   TypeError = 3,
   Invalid = 4,
   IOError = 5,
+  CapacityError = 6,
   UnknownError = 9,
   NotImplemented = 10,
   SerializationError = 11,
@@ -129,6 +130,10 @@ class ARROW_EXPORT Status {
     return Status(StatusCode::Invalid, msg);
   }
 
+  static Status CapacityError(const std::string& msg) {
+    return Status(StatusCode::CapacityError, msg);
+  }
+
   static Status IOError(const std::string& msg) {
     return Status(StatusCode::IOError, msg);
   }
@@ -160,6 +165,7 @@ class ARROW_EXPORT Status {
   bool IsKeyError() const { return code() == StatusCode::KeyError; }
   bool IsInvalid() const { return code() == StatusCode::Invalid; }
   bool IsIOError() const { return code() == StatusCode::IOError; }
+  bool IsCapacityError() const { return code() == StatusCode::CapacityError; }
   bool IsTypeError() const { return code() == StatusCode::TypeError; }
   bool IsUnknownError() const { return code() == StatusCode::UnknownError; }
   bool IsNotImplemented() const { return code() == StatusCode::NotImplemented; 
}
diff --git a/python/pyarrow/error.pxi b/python/pyarrow/error.pxi
index dfdfcd7..abd5a18 100644
--- a/python/pyarrow/error.pxi
+++ b/python/pyarrow/error.pxi
@@ -48,6 +48,10 @@ class ArrowNotImplementedError(NotImplementedError, 
ArrowException):
     pass
 
 
+class ArrowCapacityError(ArrowException):
+    pass
+
+
 class PlasmaObjectExists(ArrowException):
     pass
 
@@ -85,6 +89,8 @@ cdef int check_status(const CStatus& status) nogil except -1:
             raise ArrowNotImplementedError(message)
         elif status.IsTypeError():
             raise ArrowTypeError(message)
+        elif status.IsCapacityError():
+            raise ArrowCapacityError(message)
         elif status.IsPlasmaObjectExists():
             raise PlasmaObjectExists(message)
         elif status.IsPlasmaObjectNonexistent():
diff --git a/python/pyarrow/includes/common.pxd 
b/python/pyarrow/includes/common.pxd
index 16eae92..23617b6 100644
--- a/python/pyarrow/includes/common.pxd
+++ b/python/pyarrow/includes/common.pxd
@@ -56,6 +56,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         c_bool IsKeyError()
         c_bool IsNotImplemented()
         c_bool IsTypeError()
+        c_bool IsCapacityError()
         c_bool IsSerializationError()
         c_bool IsPythonError()
         c_bool IsPlasmaObjectExists()

-- 
To stop receiving notification emails like this one, please contact
[email protected].

Reply via email to