[
https://issues.apache.org/jira/browse/ARROW-2142?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16382850#comment-16382850
]
ASF GitHub Bot commented on ARROW-2142:
---------------------------------------
wesm commented on a change in pull request #1635: ARROW-2142: [Python] Allow
conversion from Numpy struct array
URL: https://github.com/apache/arrow/pull/1635#discussion_r171722648
##########
File path: cpp/src/arrow/array.cc
##########
@@ -772,6 +773,105 @@ std::shared_ptr<Array> MakeArray(const
std::shared_ptr<ArrayData>& data) {
return out;
}
+// ----------------------------------------------------------------------
+// Misc APIs
+
+namespace internal {
+
+std::vector<ArrayVector> RechunkArraysConsistently(
+ const std::vector<ArrayVector>& groups) {
+ if (groups.size() <= 1) {
+ return groups;
+ }
+ // Adjacent slices defining the desired rechunking
+ std::vector<std::pair<int64_t, int64_t>> slices;
+ // Total number of elements common to all array groups
+ int64_t total_length = -1;
+
+ {
+ // Compute a vector of slices such that each array spans
+ // one or more *entire* slices only
+ // e.g. if group #1 has bounds {0, 2, 4, 5, 10}
+ // and group #2 has bounds {0, 5, 7, 10}
+ // then the computed slices are
+ // {(0, 2), (2, 4), (4, 5), (5, 7), (7, 10)}
+ std::set<int64_t> bounds;
+ for (auto& group : groups) {
+ int64_t cur = 0;
+ bounds.insert(cur);
+ for (auto& array : group) {
+ cur += array->length();
+ bounds.insert(cur);
+ }
+ if (total_length == -1) {
+ total_length = cur;
+ } else {
+ // XXX Should we return an error code instead?
+ DCHECK_EQ(total_length, cur)
+ << "Array groups should have the same number of elements";
+ }
+ }
+ if (total_length == 0) {
+ return groups;
+ }
+ auto it = bounds.cbegin();
+ auto end = bounds.cend();
+ int64_t start = *it;
+ while (++it != end) {
+ int64_t stop = *it;
+ DCHECK_GE(stop, start);
+ slices.emplace_back(start, stop);
+ start = stop;
+ }
+ DCHECK_EQ(slices.front().first, 0);
+ DCHECK_EQ(slices.back().second, total_length);
+ }
+
+ // Rechunk each array group along the computed slices
+ std::vector<ArrayVector> rechunked_groups;
+ for (auto& group : groups) {
+ ArrayVector rechunked;
+ int64_t cur = 0;
+ auto slices_it = slices.cbegin();
+ auto slices_end = slices.cend();
+
+ for (auto& array : group) {
+ int64_t array_start = cur, array_stop = cur + array->length();
Review comment:
It's better for readability to put each assignment on its own line
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> [Python] Conversion from Numpy struct array unimplemented
> ---------------------------------------------------------
>
> Key: ARROW-2142
> URL: https://issues.apache.org/jira/browse/ARROW-2142
> Project: Apache Arrow
> Issue Type: Improvement
> Components: Python
> Affects Versions: 0.8.0
> Reporter: Antoine Pitrou
> Assignee: Antoine Pitrou
> Priority: Major
> Labels: pull-request-available
>
> {code:python}
> >>> arr = np.array([(1.5,)], dtype=np.dtype([('x', np.float32)]))
> >>> arr
> array([(1.5,)], dtype=[('x', '<f4')])
> >>> arr[0]
> (1.5,)
> >>> arr['x']
> array([1.5], dtype=float32)
> >>> arr['x'][0]
> 1.5
> >>> pa.array(arr, type=pa.struct([pa.field('x', pa.float32())]))
> Traceback (most recent call last):
> File "<ipython-input-18-27a52820b7d8>", line 1, in <module>
> pa.array(arr, type=pa.struct([pa.field('x', pa.float32())]))
> File "array.pxi", line 177, in pyarrow.lib.array
> File "error.pxi", line 77, in pyarrow.lib.check_status
> File "error.pxi", line 85, in pyarrow.lib.check_status
> ArrowNotImplementedError:
> /home/antoine/arrow/cpp/src/arrow/python/numpy_to_arrow.cc:1585 code:
> converter.Convert()
> NumPyConverter doesn't implement <struct<x: float>> conversion.
> {code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)