[ 
https://issues.apache.org/jira/browse/ARROW-2142?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16389626#comment-16389626
 ] 

ASF GitHub Bot commented on ARROW-2142:
---------------------------------------

pitrou commented on a change in pull request #1635: ARROW-2142: [Python] Allow 
conversion from Numpy struct array
URL: https://github.com/apache/arrow/pull/1635#discussion_r172858790
 
 

 ##########
 File path: python/pyarrow/tests/test_convert_pandas.py
 ##########
 @@ -1463,6 +1463,124 @@ def test_structarray(self):
         series = pd.Series(arr.to_pandas())
         tm.assert_series_equal(series, expected)
 
+    def test_from_numpy(self):
+        dt = np.dtype([('x', np.int32),
+                       (('y_title', 'y'), np.bool_)])
+        ty = pa.struct([pa.field('x', pa.int32()),
+                        pa.field('y', pa.bool_())])
+
+        data = np.array([], dtype=dt)
+        arr = pa.array(data, type=ty)
+        assert arr.to_pylist() == []
+
+        data = np.array([(42, True), (43, False)], dtype=dt)
+        arr = pa.array(data, type=ty)
+        assert arr.to_pylist() == [{'x': 42, 'y': True},
+                                   {'x': 43, 'y': False}]
+
+        # With mask
+        arr = pa.array(data, mask=np.bool_([False, True]), type=ty)
+        assert arr.to_pylist() == [{'x': 42, 'y': True}, None]
+
+        # Trivial struct type
+        dt = np.dtype([])
+        ty = pa.struct([])
+
+        data = np.array([], dtype=dt)
+        arr = pa.array(data, type=ty)
+        assert arr.to_pylist() == []
+
+        data = np.array([(), ()], dtype=dt)
+        arr = pa.array(data, type=ty)
+        assert arr.to_pylist() == [{}, {}]
+
+    def test_from_numpy_nested(self):
+        dt = np.dtype([('x', np.dtype([('xx', np.int8),
+                                       ('yy', np.bool_)])),
+                       ('y', np.int16)])
+        ty = pa.struct([pa.field('x', pa.struct([pa.field('xx', pa.int8()),
+                                                 pa.field('yy', pa.bool_())])),
+                        pa.field('y', pa.int16())])
+
+        data = np.array([], dtype=dt)
+        arr = pa.array(data, type=ty)
+        assert arr.to_pylist() == []
+
+        data = np.array([((1, True), 2), ((3, False), 4)], dtype=dt)
+        arr = pa.array(data, type=ty)
+        assert arr.to_pylist() == [{'x': {'xx': 1, 'yy': True}, 'y': 2},
+                                   {'x': {'xx': 3, 'yy': False}, 'y': 4}]
+
+    @pytest.mark.large_memory
+    def test_from_numpy_large(self):
+        # Exercise rechunking + nulls
+        target_size = 3 * 1024**3  # 4GB
+        dt = np.dtype([('x', np.float64), ('y', 'object')])
+        bs = 65536 - dt.itemsize
+        block = b'.' * bs
+        n = target_size // (bs + dt.itemsize)
+        data = np.zeros(n, dtype=dt)
+        data['x'] = np.random.random_sample(n)
+        data['y'] = block
+        # Add implicit nulls
+        data['x'][data['x'] < 0.2] = np.nan
+
+        ty = pa.struct([pa.field('x', pa.float64()),
+                        pa.field('y', pa.binary(bs))])
+        arr = pa.array(data, type=ty, from_pandas=True)
+        assert arr.num_chunks == 2
+
+        def iter_chunked_array(arr):
+            for chunk in arr.iterchunks():
+                for item in chunk:
+                    yield item
+
+        def check(arr, data, mask=None):
 
 Review comment:
   Not sure whether there's a more compact form of writing this function...

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> [Python] Conversion from Numpy struct array unimplemented
> ---------------------------------------------------------
>
>                 Key: ARROW-2142
>                 URL: https://issues.apache.org/jira/browse/ARROW-2142
>             Project: Apache Arrow
>          Issue Type: Improvement
>          Components: Python
>    Affects Versions: 0.8.0
>            Reporter: Antoine Pitrou
>            Assignee: Antoine Pitrou
>            Priority: Major
>              Labels: pull-request-available
>
> {code:python}
> >>> arr = np.array([(1.5,)], dtype=np.dtype([('x', np.float32)]))
> >>> arr
> array([(1.5,)], dtype=[('x', '<f4')])
> >>> arr[0]
> (1.5,)
> >>> arr['x']
> array([1.5], dtype=float32)
> >>> arr['x'][0]
> 1.5
> >>> pa.array(arr, type=pa.struct([pa.field('x', pa.float32())]))
> Traceback (most recent call last):
>   File "<ipython-input-18-27a52820b7d8>", line 1, in <module>
>     pa.array(arr, type=pa.struct([pa.field('x', pa.float32())]))
>   File "array.pxi", line 177, in pyarrow.lib.array
>   File "error.pxi", line 77, in pyarrow.lib.check_status
>   File "error.pxi", line 85, in pyarrow.lib.check_status
> ArrowNotImplementedError: 
> /home/antoine/arrow/cpp/src/arrow/python/numpy_to_arrow.cc:1585 code: 
> converter.Convert()
> NumPyConverter doesn't implement <struct<x: float>> conversion.
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to