[ 
https://issues.apache.org/jira/browse/ARROW-2052?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16349798#comment-16349798
 ] 

ASF GitHub Bot commented on ARROW-2052:
---------------------------------------

wesm closed pull request #1534: ARROW-2052: [C++ / Python] Rework OwnedRef, 
remove ScopedRef
URL: https://github.com/apache/arrow/pull/1534
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc 
b/cpp/src/arrow/python/arrow_to_pandas.cc
index 5c8c970e1..fcf05f833 100644
--- a/cpp/src/arrow/python/arrow_to_pandas.cc
+++ b/cpp/src/arrow/python/arrow_to_pandas.cc
@@ -265,13 +265,13 @@ class PandasBlock {
   int64_t num_rows_;
   int num_columns_;
 
-  OwnedRef block_arr_;
+  OwnedRefNoGIL block_arr_;
   uint8_t* block_data_;
 
   PandasOptions options_;
 
   // ndarray<int32>
-  OwnedRef placement_arr_;
+  OwnedRefNoGIL placement_arr_;
   int64_t* placement_data_;
 
  private:
@@ -1140,7 +1140,7 @@ class CategoricalBlock : public PandasBlock {
   }
 
   MemoryPool* pool_;
-  OwnedRef dictionary_;
+  OwnedRefNoGIL dictionary_;
   bool ordered_;
   bool needs_copy_;
 };
diff --git a/cpp/src/arrow/python/arrow_to_python.cc 
b/cpp/src/arrow/python/arrow_to_python.cc
index c67e5410e..54a71d5a3 100644
--- a/cpp/src/arrow/python/arrow_to_python.cc
+++ b/cpp/src/arrow/python/arrow_to_python.cc
@@ -64,8 +64,8 @@ Status DeserializeDict(PyObject* context, const Array& array, 
int64_t start_idx,
                        int64_t stop_idx, PyObject* base, const 
SerializedPyObject& blobs,
                        PyObject** out) {
   const auto& data = static_cast<const StructArray&>(array);
-  ScopedRef keys, vals;
-  ScopedRef result(PyDict_New());
+  OwnedRef keys, vals;
+  OwnedRef result(PyDict_New());
   RETURN_IF_PYERROR();
 
   DCHECK_EQ(2, data.num_fields());
@@ -77,16 +77,16 @@ Status DeserializeDict(PyObject* context, const Array& 
array, int64_t start_idx,
   for (int64_t i = start_idx; i < stop_idx; ++i) {
     // PyDict_SetItem behaves differently from PyList_SetItem and 
PyTuple_SetItem.
     // The latter two steal references whereas PyDict_SetItem does not. So we 
need
-    // to make sure the reference count is decremented by letting the ScopedRef
+    // to make sure the reference count is decremented by letting the OwnedRef
     // go out of scope at the end.
-    PyDict_SetItem(result.get(), PyList_GET_ITEM(keys.get(), i - start_idx),
-                   PyList_GET_ITEM(vals.get(), i - start_idx));
+    PyDict_SetItem(result.obj(), PyList_GET_ITEM(keys.obj(), i - start_idx),
+                   PyList_GET_ITEM(vals.obj(), i - start_idx));
   }
   static PyObject* py_type = PyUnicode_FromString("_pytype_");
-  if (PyDict_Contains(result.get(), py_type)) {
-    RETURN_NOT_OK(CallDeserializeCallback(context, result.get(), out));
+  if (PyDict_Contains(result.obj(), py_type)) {
+    RETURN_NOT_OK(CallDeserializeCallback(context, result.obj(), out));
   } else {
-    *out = result.release();
+    *out = result.detach();
   }
   return Status::OK();
 }
@@ -96,10 +96,10 @@ Status DeserializeArray(const Array& array, int64_t offset, 
PyObject* base,
   int32_t index = static_cast<const Int32Array&>(array).Value(offset);
   RETURN_NOT_OK(py::TensorToNdarray(*blobs.tensors[index], base, out));
   // Mark the array as immutable
-  ScopedRef flags(PyObject_GetAttrString(*out, "flags"));
-  DCHECK(flags.get() != NULL) << "Could not mark Numpy array immutable";
+  OwnedRef flags(PyObject_GetAttrString(*out, "flags"));
+  DCHECK(flags.obj() != NULL) << "Could not mark Numpy array immutable";
   Py_INCREF(Py_False);
-  int flag_set = PyObject_SetAttrString(flags.get(), "writeable", Py_False);
+  int flag_set = PyObject_SetAttrString(flags.obj(), "writeable", Py_False);
   DCHECK(flag_set == 0) << "Could not mark Numpy array immutable";
   return Status::OK();
 }
@@ -184,23 +184,23 @@ Status GetValue(PyObject* context, const UnionArray& 
parent, const Array& arr,
 
 #define DESERIALIZE_SEQUENCE(CREATE_FN, SET_ITEM_FN)                           
          \
   const auto& data = static_cast<const UnionArray&>(array);                    
          \
-  ScopedRef result(CREATE_FN(stop_idx - start_idx));                           
          \
+  OwnedRef result(CREATE_FN(stop_idx - start_idx));                            
          \
   const uint8_t* type_ids = data.raw_type_ids();                               
          \
   const int32_t* value_offsets = data.raw_value_offsets();                     
          \
   for (int64_t i = start_idx; i < stop_idx; ++i) {                             
          \
     if (data.IsNull(i)) {                                                      
          \
       Py_INCREF(Py_None);                                                      
          \
-      SET_ITEM_FN(result.get(), i - start_idx, Py_None);                       
          \
+      SET_ITEM_FN(result.obj(), i - start_idx, Py_None);                       
          \
     } else {                                                                   
          \
       int64_t offset = value_offsets[i];                                       
          \
       uint8_t type = type_ids[i];                                              
          \
       PyObject* value;                                                         
          \
       RETURN_NOT_OK(GetValue(context, data, *data.UnsafeChild(type), offset, 
type, base, \
                              blobs, &value));                                  
          \
-      SET_ITEM_FN(result.get(), i - start_idx, value);                         
          \
+      SET_ITEM_FN(result.obj(), i - start_idx, value);                         
          \
     }                                                                          
          \
   }                                                                            
          \
-  *out = result.release();                                                     
          \
+  *out = result.detach();                                                      
          \
   return Status::OK()
 
 Status DeserializeList(PyObject* context, const Array& array, int64_t 
start_idx,
@@ -219,13 +219,13 @@ Status DeserializeSet(PyObject* context, const Array& 
array, int64_t start_idx,
                       int64_t stop_idx, PyObject* base, const 
SerializedPyObject& blobs,
                       PyObject** out) {
   const auto& data = static_cast<const UnionArray&>(array);
-  ScopedRef result(PySet_New(nullptr));
+  OwnedRef result(PySet_New(nullptr));
   const uint8_t* type_ids = data.raw_type_ids();
   const int32_t* value_offsets = data.raw_value_offsets();
   for (int64_t i = start_idx; i < stop_idx; ++i) {
     if (data.IsNull(i)) {
       Py_INCREF(Py_None);
-      if (PySet_Add(result.get(), Py_None) < 0) {
+      if (PySet_Add(result.obj(), Py_None) < 0) {
         RETURN_IF_PYERROR();
       }
     } else {
@@ -234,12 +234,12 @@ Status DeserializeSet(PyObject* context, const Array& 
array, int64_t start_idx,
       PyObject* value;
       RETURN_NOT_OK(GetValue(context, data, *data.UnsafeChild(type), offset, 
type, base,
                              blobs, &value));
-      if (PySet_Add(result.get(), value) < 0) {
+      if (PySet_Add(result.obj(), value) < 0) {
         RETURN_IF_PYERROR();
       }
     }
   }
-  *out = result.release();
+  *out = result.detach();
   return Status::OK();
 }
 
diff --git a/cpp/src/arrow/python/builtin_convert.cc 
b/cpp/src/arrow/python/builtin_convert.cc
index 63d388925..d84b8a48c 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -830,7 +830,7 @@ static Status ConvertPySequenceReal(PyObject* obj, int64_t 
size,
   PyAcquireGIL lock;
 
   PyObject* seq;
-  ScopedRef tmp_seq_nanny;
+  OwnedRef tmp_seq_nanny;
 
   std::shared_ptr<DataType> real_type;
 
diff --git a/cpp/src/arrow/python/common.h b/cpp/src/arrow/python/common.h
index 146864ffd..b1e0888af 100644
--- a/cpp/src/arrow/python/common.h
+++ b/cpp/src/arrow/python/common.h
@@ -61,59 +61,30 @@ class ARROW_EXPORT PyAcquireGIL {
 
 #define PYARROW_IS_PY2 PY_MAJOR_VERSION <= 2
 
+// A RAII primitive that DECREFs the underlying PyObject* when it
+// goes out of scope.
 class ARROW_EXPORT OwnedRef {
  public:
   OwnedRef() : obj_(NULLPTR) {}
 
   explicit OwnedRef(PyObject* obj) : obj_(obj) {}
 
-  ~OwnedRef() {
-    PyAcquireGIL lock;
-    release();
-  }
+  ~OwnedRef() { reset(); }
 
   void reset(PyObject* obj) {
-    /// TODO(phillipc): Should we acquire the GIL here? It definitely needs to 
be
-    /// acquired,
-    /// but callers have probably already acquired it
     Py_XDECREF(obj_);
     obj_ = obj;
   }
 
-  void release() {
-    Py_XDECREF(obj_);
-    obj_ = NULLPTR;
-  }
-
-  PyObject* obj() const { return obj_; }
-
- private:
-  PyObject* obj_;
-};
-
-// This is different from OwnedRef in that it assumes that
-// the GIL is held by the caller and doesn't decrement the
-// reference count when release is called.
-class ARROW_EXPORT ScopedRef {
- public:
-  ScopedRef() : obj_(NULLPTR) {}
+  void reset() { reset(NULLPTR); }
 
-  explicit ScopedRef(PyObject* obj) : obj_(obj) {}
-
-  ~ScopedRef() { Py_XDECREF(obj_); }
-
-  void reset(PyObject* obj) {
-    Py_XDECREF(obj_);
-    obj_ = obj;
-  }
-
-  PyObject* release() {
+  PyObject* detach() {
     PyObject* result = obj_;
     obj_ = NULLPTR;
     return result;
   }
 
-  PyObject* get() const { return obj_; }
+  PyObject* obj() const { return obj_; }
 
   PyObject** ref() { return &obj_; }
 
@@ -121,6 +92,17 @@ class ARROW_EXPORT ScopedRef {
   PyObject* obj_;
 };
 
+// Same as OwnedRef, but ensures the GIL is taken when it goes out of scope.
+// This is for situations where the GIL is not always known to be held
+// (e.g. if it is released in the middle of a function for performance reasons)
+class ARROW_EXPORT OwnedRefNoGIL : public OwnedRef {
+ public:
+  ~OwnedRefNoGIL() {
+    PyAcquireGIL lock;
+    reset();
+  }
+};
+
 struct ARROW_EXPORT PyObjectStringify {
   OwnedRef tmp_obj;
   const char* bytes;
diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc 
b/cpp/src/arrow/python/numpy_to_arrow.cc
index a1161fe32..1b1673bb8 100644
--- a/cpp/src/arrow/python/numpy_to_arrow.cc
+++ b/cpp/src/arrow/python/numpy_to_arrow.cc
@@ -1116,10 +1116,10 @@ Status LoopPySequence(PyObject* sequence, T func) {
       }
     }
   } else if (PyObject_HasAttrString(sequence, "__iter__")) {
-    OwnedRef iter = OwnedRef(PyObject_GetIter(sequence));
+    OwnedRef iter(PyObject_GetIter(sequence));
     PyObject* item;
     while ((item = PyIter_Next(iter.obj()))) {
-      OwnedRef ref = OwnedRef(item);
+      OwnedRef ref(item);
       RETURN_NOT_OK(func(ref.obj()));
     }
   } else {
@@ -1149,11 +1149,11 @@ Status LoopPySequenceWithMasks(PyObject* sequence,
       }
     }
   } else if (PyObject_HasAttrString(sequence, "__iter__")) {
-    OwnedRef iter = OwnedRef(PyObject_GetIter(sequence));
+    OwnedRef iter(PyObject_GetIter(sequence));
     PyObject* item;
     int64_t i = 0;
     while ((item = PyIter_Next(iter.obj()))) {
-      OwnedRef ref = OwnedRef(item);
+      OwnedRef ref(item);
       RETURN_NOT_OK(func(ref.obj(), have_mask && mask_values[i]));
       i++;
     }
@@ -1476,20 +1476,20 @@ Status AppendUTF32(const char* data, int itemsize, int 
byteorder,
     }
   }
 
-  ScopedRef unicode_obj(PyUnicode_DecodeUTF32(data, actual_length * 
kNumPyUnicodeSize,
-                                              nullptr, &byteorder));
+  OwnedRef unicode_obj(PyUnicode_DecodeUTF32(data, actual_length * 
kNumPyUnicodeSize,
+                                             nullptr, &byteorder));
   RETURN_IF_PYERROR();
-  ScopedRef utf8_obj(PyUnicode_AsUTF8String(unicode_obj.get()));
-  if (utf8_obj.get() == NULL) {
+  OwnedRef utf8_obj(PyUnicode_AsUTF8String(unicode_obj.obj()));
+  if (utf8_obj.obj() == NULL) {
     PyErr_Clear();
     return Status::Invalid("failed converting UTF32 to UTF8");
   }
 
-  const int32_t length = 
static_cast<int32_t>(PyBytes_GET_SIZE(utf8_obj.get()));
+  const int32_t length = 
static_cast<int32_t>(PyBytes_GET_SIZE(utf8_obj.obj()));
   if (builder->value_data_length() + length > kBinaryMemoryLimit) {
     return Status::Invalid("Encoded string length exceeds maximum size (2GB)");
   }
-  return builder->Append(PyBytes_AS_STRING(utf8_obj.get()), length);
+  return builder->Append(PyBytes_AS_STRING(utf8_obj.obj()), length);
 }
 
 }  // namespace
diff --git a/cpp/src/arrow/python/python_to_arrow.cc 
b/cpp/src/arrow/python/python_to_arrow.cc
index 253e9d9a7..6d4f64675 100644
--- a/cpp/src/arrow/python/python_to_arrow.cc
+++ b/cpp/src/arrow/python/python_to_arrow.cc
@@ -365,15 +365,15 @@ Status CallCustomCallback(PyObject* context, PyObject* 
method_name, PyObject* el
   *result = NULL;
   if (context == Py_None) {
     std::stringstream ss;
-    ScopedRef repr(PyObject_Repr(elem));
+    OwnedRef repr(PyObject_Repr(elem));
     RETURN_IF_PYERROR();
 #if PY_MAJOR_VERSION >= 3
-    ScopedRef ascii(PyUnicode_AsASCIIString(repr.get()));
+    OwnedRef ascii(PyUnicode_AsASCIIString(repr.obj()));
     RETURN_IF_PYERROR();
-    ss << "error while calling callback on " << PyBytes_AsString(ascii.get())
+    ss << "error while calling callback on " << PyBytes_AsString(ascii.obj())
        << ": handler not registered";
 #else
-    ss << "error while calling callback on " << PyString_AsString(repr.get())
+    ss << "error while calling callback on " << PyString_AsString(repr.obj())
        << ": handler not registered";
 #endif
     return Status::SerializationError(ss.str());
@@ -386,8 +386,8 @@ Status CallCustomCallback(PyObject* context, PyObject* 
method_name, PyObject* el
 
 Status CallSerializeCallback(PyObject* context, PyObject* value,
                              PyObject** serialized_object) {
-  ScopedRef method_name(PyUnicode_FromString("_serialize_callback"));
-  RETURN_NOT_OK(CallCustomCallback(context, method_name.get(), value, 
serialized_object));
+  OwnedRef method_name(PyUnicode_FromString("_serialize_callback"));
+  RETURN_NOT_OK(CallCustomCallback(context, method_name.obj(), value, 
serialized_object));
   if (!PyDict_Check(*serialized_object)) {
     return Status::TypeError("serialization callback must return a valid 
dictionary");
   }
@@ -396,8 +396,8 @@ Status CallSerializeCallback(PyObject* context, PyObject* 
value,
 
 Status CallDeserializeCallback(PyObject* context, PyObject* value,
                                PyObject** deserialized_object) {
-  ScopedRef method_name(PyUnicode_FromString("_deserialize_callback"));
-  return CallCustomCallback(context, method_name.get(), value, 
deserialized_object);
+  OwnedRef method_name(PyUnicode_FromString("_deserialize_callback"));
+  return CallCustomCallback(context, method_name.obj(), value, 
deserialized_object);
 }
 
 Status SerializeDict(PyObject* context, std::vector<PyObject*> dicts,
@@ -493,9 +493,9 @@ Status Append(PyObject* context, PyObject* elem, 
SequenceBuilder* builder,
 #if PY_MAJOR_VERSION >= 3
     char* data = PyUnicode_AsUTF8AndSize(elem, &size);
 #else
-    ScopedRef str(PyUnicode_AsUTF8String(elem));
-    char* data = PyString_AS_STRING(str.get());
-    size = PyString_GET_SIZE(str.get());
+    OwnedRef str(PyUnicode_AsUTF8String(elem));
+    char* data = PyString_AS_STRING(str.obj());
+    size = PyString_GET_SIZE(str.obj());
 #endif
     if (size > std::numeric_limits<int32_t>::max()) {
       return Status::Invalid("Cannot writes bytes over 2GB");
@@ -585,15 +585,15 @@ Status SerializeSequences(PyObject* context, 
std::vector<PyObject*> sequences,
   SequenceBuilder builder(nullptr);
   std::vector<PyObject*> sublists, subtuples, subdicts, subsets;
   for (const auto& sequence : sequences) {
-    ScopedRef iterator(PyObject_GetIter(sequence));
+    OwnedRef iterator(PyObject_GetIter(sequence));
     RETURN_IF_PYERROR();
-    ScopedRef item;
+    OwnedRef item;
     while (true) {
-      item.reset(PyIter_Next(iterator.get()));
-      if (!item.get()) {
+      item.reset(PyIter_Next(iterator.obj()));
+      if (!item.obj()) {
         break;
       }
-      RETURN_NOT_OK(Append(context, item.get(), &builder, &sublists, 
&subtuples,
+      RETURN_NOT_OK(Append(context, item.obj(), &builder, &sublists, 
&subtuples,
                            &subdicts, &subsets, blobs_out));
     }
   }
@@ -739,18 +739,18 @@ Status SerializedPyObject::WriteTo(io::OutputStream* dst) 
{
 Status SerializedPyObject::GetComponents(MemoryPool* memory_pool, PyObject** 
out) {
   PyAcquireGIL py_gil;
 
-  ScopedRef result(PyDict_New());
+  OwnedRef result(PyDict_New());
   PyObject* buffers = PyList_New(0);
 
   // TODO(wesm): Not sure how pedantic we need to be about checking the return
   // values of these functions. There are other places where we do not check
   // PyDict_SetItem/SetItemString return value, but these failures would be
   // quite esoteric
-  PyDict_SetItemString(result.get(), "num_tensors",
+  PyDict_SetItemString(result.obj(), "num_tensors",
                        PyLong_FromSize_t(this->tensors.size()));
-  PyDict_SetItemString(result.get(), "num_buffers",
+  PyDict_SetItemString(result.obj(), "num_buffers",
                        PyLong_FromSize_t(this->buffers.size()));
-  PyDict_SetItemString(result.get(), "data", buffers);
+  PyDict_SetItemString(result.obj(), "data", buffers);
   RETURN_IF_PYERROR();
 
   Py_DECREF(buffers);
@@ -792,7 +792,7 @@ Status SerializedPyObject::GetComponents(MemoryPool* 
memory_pool, PyObject** out
     RETURN_NOT_OK(PushBuffer(buf));
   }
 
-  *out = result.release();
+  *out = result.detach();
   return Status::OK();
 }
 


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Unify OwnedRef and ScopedRef
> ----------------------------
>
>                 Key: ARROW-2052
>                 URL: https://issues.apache.org/jira/browse/ARROW-2052
>             Project: Apache Arrow
>          Issue Type: Task
>          Components: Python
>    Affects Versions: 0.8.0
>            Reporter: Antoine Pitrou
>            Assignee: Antoine Pitrou
>            Priority: Trivial
>              Labels: pull-request-available
>             Fix For: 0.9.0
>
>
> Currently {{OwnedRef}} and {{ScopedRef}} have similar semantics with small 
> differences. Furtheremore, the naming distinction isn't obvious.
> I propose to unify them as a single {{OwnedRef}} class with the following 
> characteristics:
> - doesn't take the GIL automatically
> - has a {{release()}} method that decrefs the pointer (and sets the internal 
> copy to NULL) before returning it
> - has a {{detach()}} method that returns the pointer (and sets the internal 
> copy to NULL) without decrefing it
> For the rare situations where an {{OwnedRef}} may be destroyed with the GIL 
> released, a {{OwnedRefNoGIL}} derived class would also be proposed (the 
> naming scheme follows Cython here).
> Opinions / comments?



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to