seberg commented on code in PR #48391:
URL: https://github.com/apache/arrow/pull/48391#discussion_r2671805458
##########
python/pyarrow/src/arrow/python/numpy_to_arrow.cc:
##########
@@ -815,6 +856,110 @@ Status NumPyConverter::Visit(const StringViewType& type) {
return Status::OK();
}
+#if NPY_ABI_VERSION >= 0x02000000
+template <typename Builder>
+Status NumPyConverter::AppendStringDTypeValues(Builder* builder) {
+ auto* descr = reinterpret_cast<PyArray_StringDTypeObject*>(dtype_);
+
+ npy_string_allocator* allocator = ArrowNpyString_acquire_allocator(descr);
+ if (allocator == nullptr) {
+ return Status::Invalid("Failed to acquire NumPy StringDType allocator");
+ }
+
+ struct AllocatorGuard {
+ npy_string_allocator* ptr;
+ explicit AllocatorGuard(npy_string_allocator* p) : ptr(p) {}
+ ~AllocatorGuard() {
+ if (ptr != nullptr) {
+ ArrowNpyString_release_allocator(ptr);
+ }
+ }
+ } guard(allocator);
+
+ npy_static_string value = {0, nullptr};
+ char* data = PyArray_BYTES(arr_);
+
+ if (mask_ != nullptr) {
+ Ndarray1DIndexer<uint8_t> mask_values(mask_);
+ for (int64_t i = 0; i < length_; ++i) {
+ if (mask_values[i]) {
+ RETURN_NOT_OK(builder->AppendNull());
+ continue;
+ }
+
+ const auto* packed =
+ reinterpret_cast<const npy_packed_static_string*>(data + i *
stride_);
+ const int is_null = ArrowNpyString_load(allocator, packed, &value);
Review Comment:
https://numpy.org/neps/nep-0055-string_dtype.html#memory-layout-and-managing-heap-allocations
is the NEP about it.
But basically, the string can't be stored in the array data itself, because
NumPy requires a fixed size per element.
So, instead the string (if not very small) is stored in a separately
allocated buffer (much like in arrow except it's more annoying as we are N-D).
And that buffer is owned by the `dtype`, which is the reason you fetch the
`allocator` from the dtype/descr.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]