This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 6e61c5e216 GH-39096: [Python] Release GIL in `.nbytes` (#39097)
6e61c5e216 is described below
commit 6e61c5e2163c8509411143752afc7f3bb37184cb
Author: Hendrik Makait <[email protected]>
AuthorDate: Thu Dec 7 14:18:06 2023 +0100
GH-39096: [Python] Release GIL in `.nbytes` (#39097)
### Rationale for this change
The `.nbytes` holds the GIL while computing the data size in C++, which has
caused performance issues in Dask because threads were blocking each other
See #39096
### Are these changes tested?
I am not sure if additional tests are necessary here. If so, I'm happy to
add them but would welcome some pointers.
### Are there any user-facing changes?
No
* Closes: #39096
Authored-by: Hendrik Makait <[email protected]>
Signed-off-by: Joris Van den Bossche <[email protected]>
---
python/pyarrow/array.pxi | 5 +++--
python/pyarrow/table.pxi | 15 +++++++++------
2 files changed, 12 insertions(+), 8 deletions(-)
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 9d62bed51f..789e30d3e9 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -1206,8 +1206,9 @@ cdef class Array(_PandasConvertible):
cdef:
CResult[int64_t] c_size_res
- c_size_res = ReferencedBufferSize(deref(self.ap))
- size = GetResultValue(c_size_res)
+ with nogil:
+ c_size_res = ReferencedBufferSize(deref(self.ap))
+ size = GetResultValue(c_size_res)
return size
def get_total_buffer_size(self):
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index f93f595090..2f8d1abd1f 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -248,8 +248,9 @@ cdef class ChunkedArray(_PandasConvertible):
cdef:
CResult[int64_t] c_res_buffer
- c_res_buffer = ReferencedBufferSize(deref(self.chunked_array))
- size = GetResultValue(c_res_buffer)
+ with nogil:
+ c_res_buffer = ReferencedBufferSize(deref(self.chunked_array))
+ size = GetResultValue(c_res_buffer)
return size
def get_total_buffer_size(self):
@@ -2386,8 +2387,9 @@ cdef class RecordBatch(_Tabular):
cdef:
CResult[int64_t] c_res_buffer
- c_res_buffer = ReferencedBufferSize(deref(self.batch))
- size = GetResultValue(c_res_buffer)
+ with nogil:
+ c_res_buffer = ReferencedBufferSize(deref(self.batch))
+ size = GetResultValue(c_res_buffer)
return size
def get_total_buffer_size(self):
@@ -4337,8 +4339,9 @@ cdef class Table(_Tabular):
cdef:
CResult[int64_t] c_res_buffer
- c_res_buffer = ReferencedBufferSize(deref(self.table))
- size = GetResultValue(c_res_buffer)
+ with nogil:
+ c_res_buffer = ReferencedBufferSize(deref(self.table))
+ size = GetResultValue(c_res_buffer)
return size
def get_total_buffer_size(self):