[
https://issues.apache.org/jira/browse/ARROW-2142?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16371723#comment-16371723
]
ASF GitHub Bot commented on ARROW-2142:
---------------------------------------
pitrou commented on a change in pull request #1635: ARROW-2142: [Python] Allow
conversion from Numpy struct array
URL: https://github.com/apache/arrow/pull/1635#discussion_r169718158
##########
File path: cpp/src/arrow/python/numpy_to_arrow.cc
##########
@@ -1590,6 +1592,85 @@ Status NumPyConverter::Visit(const StringType& type) {
return PushArray(result->data());
}
+Status NumPyConverter::Visit(const StructType& type) {
+ std::vector<NumPyConverter> sub_converters;
+ std::vector<OwnedRefNoGIL> sub_arrays;
+
+ {
+ PyAcquireGIL gil_lock;
+
+ // Create converters for each struct type field
+ if (dtype_->fields == NULL || !PyDict_Check(dtype_->fields)) {
+ return Status::TypeError("Expected struct array");
+ }
+
+ for (auto field : type.children()) {
+ PyObject* tup = PyDict_GetItemString(dtype_->fields,
field->name().c_str());
+ if (tup == NULL) {
+ std::stringstream ss;
+ ss << "Missing field '" << field->name() << "' in struct array";
+ return Status::TypeError(ss.str());
+ }
+ PyArray_Descr* sub_dtype =
reinterpret_cast<PyArray_Descr*>(PyTuple_GET_ITEM(tup, 0));
+ DCHECK(PyArray_DescrCheck(sub_dtype));
+ int offset = static_cast<int>(PyLong_AsLong(PyTuple_GET_ITEM(tup, 1)));
+ RETURN_IF_PYERROR();
+ Py_INCREF(sub_dtype); /* PyArray_GetField() steals ref */
+ PyObject* sub_array = PyArray_GetField(arr_, sub_dtype, offset);
+ RETURN_IF_PYERROR();
+ sub_arrays.emplace_back(sub_array);
+ sub_converters.emplace_back(pool_, sub_array, nullptr /* mask */,
+ field->type(), use_pandas_null_sentinels_);
+ }
+ }
+
+ std::vector<ArrayVector> groups;
+
+ // Compute null bitmap and store it as a Null Array to include it
+ // in the rechunking below
+ {
+ int64_t null_count = 0;
+ if (mask_ != nullptr) {
+ RETURN_NOT_OK(InitNullBitmap());
+ null_count = MaskToBitmap(mask_, length_, null_bitmap_data_);
+ }
+ auto null_data = ArrayData::Make(std::make_shared<NullType>(), length_,
+ {null_bitmap_}, null_count, 0);
Review comment:
Note this is a bit of hack, since typically null arrays don't have an
underlying buffer at all.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> [Python] Conversion from Numpy struct array unimplemented
> ---------------------------------------------------------
>
> Key: ARROW-2142
> URL: https://issues.apache.org/jira/browse/ARROW-2142
> Project: Apache Arrow
> Issue Type: Improvement
> Components: Python
> Affects Versions: 0.8.0
> Reporter: Antoine Pitrou
> Assignee: Antoine Pitrou
> Priority: Major
> Labels: pull-request-available
>
> {code:python}
> >>> arr = np.array([(1.5,)], dtype=np.dtype([('x', np.float32)]))
> >>> arr
> array([(1.5,)], dtype=[('x', '<f4')])
> >>> arr[0]
> (1.5,)
> >>> arr['x']
> array([1.5], dtype=float32)
> >>> arr['x'][0]
> 1.5
> >>> pa.array(arr, type=pa.struct([pa.field('x', pa.float32())]))
> Traceback (most recent call last):
> File "<ipython-input-18-27a52820b7d8>", line 1, in <module>
> pa.array(arr, type=pa.struct([pa.field('x', pa.float32())]))
> File "array.pxi", line 177, in pyarrow.lib.array
> File "error.pxi", line 77, in pyarrow.lib.check_status
> File "error.pxi", line 85, in pyarrow.lib.check_status
> ArrowNotImplementedError:
> /home/antoine/arrow/cpp/src/arrow/python/numpy_to_arrow.cc:1585 code:
> converter.Convert()
> NumPyConverter doesn't implement <struct<x: float>> conversion.
> {code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)