seberg commented on code in PR #48391:
URL: https://github.com/apache/arrow/pull/48391#discussion_r2671841794
##########
python/pyarrow/src/arrow/python/numpy_to_arrow.cc:
##########
@@ -815,6 +856,110 @@ Status NumPyConverter::Visit(const StringViewType& type) {
return Status::OK();
}
+#if NPY_ABI_VERSION >= 0x02000000
+template <typename Builder>
+Status NumPyConverter::AppendStringDTypeValues(Builder* builder) {
+ auto* descr = reinterpret_cast<PyArray_StringDTypeObject*>(dtype_);
+
+ npy_string_allocator* allocator = ArrowNpyString_acquire_allocator(descr);
+ if (allocator == nullptr) {
+ return Status::Invalid("Failed to acquire NumPy StringDType allocator");
+ }
+
+ struct AllocatorGuard {
+ npy_string_allocator* ptr;
+ explicit AllocatorGuard(npy_string_allocator* p) : ptr(p) {}
+ ~AllocatorGuard() {
+ if (ptr != nullptr) {
+ ArrowNpyString_release_allocator(ptr);
+ }
+ }
+ } guard(allocator);
+
+ npy_static_string value = {0, nullptr};
+ char* data = PyArray_BYTES(arr_);
+
+ if (mask_ != nullptr) {
+ Ndarray1DIndexer<uint8_t> mask_values(mask_);
+ for (int64_t i = 0; i < length_; ++i) {
+ if (mask_values[i]) {
+ RETURN_NOT_OK(builder->AppendNull());
+ continue;
+ }
+
+ const auto* packed =
+ reinterpret_cast<const npy_packed_static_string*>(data + i *
stride_);
+ const int is_null = ArrowNpyString_load(allocator, packed, &value);
Review Comment:
The "allocator" knows where the second buffer is. The other reason is that
it adds locking so that we don't point to corrupted data for StringDType.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]