AlenkaF commented on code in PR #41889:
URL: https://github.com/apache/arrow/pull/41889#discussion_r1629965069
##########
python/pyarrow/tests/test_io.py:
##########
@@ -669,6 +669,57 @@ def test_allocate_buffer_resizable():
assert buf.size == 200
+def test_non_cpu_buffer(pickle_module):
+ cuda = pytest.importorskip("pyarrow.cuda")
+ ctx = cuda.Context(0)
+
+ arr = np.arange(4, dtype=np.int32)
+ cuda_buf = ctx.buffer_from_data(arr)
+
+ arr = pa.Array.from_buffers(pa.int32(), 4, [None, cuda_buf])
+ buf_on_gpu = arr.buffers()[1]
+
+ assert buf_on_gpu.size == cuda_buf.size
+ assert buf_on_gpu.address == cuda_buf.address
+ assert buf_on_gpu.is_cpu == cuda_buf.is_cpu
+
+ repr1 = "<pyarrow.Buffer address="
+ repr2 = "size=16 is_cpu=False is_mutable=True>"
+ assert repr1 in repr(buf_on_gpu)
+ assert repr2 in repr(buf_on_gpu)
+
+ msg = "Implemented only for data on CPU device"
+ with pytest.raises(NotImplementedError, match=msg):
+ buf_on_gpu.hex()
+
+ with pytest.raises(NotImplementedError, match=msg):
+ cuda_buf.hex()
+
+ assert buf_on_gpu.is_mutable
+
+ with pytest.raises(NotImplementedError, match=msg):
+ buf_on_gpu[1]
+
+ with pytest.raises(NotImplementedError, match=msg):
+ cuda_buf[1]
+
+ with pytest.raises(NotImplementedError, match=msg):
+ pickle_module.dumps(buf_on_gpu, protocol=4)
+
+ buf = pa.py_buffer(b'testing')
+ arr = pa.FixedSizeBinaryArray.from_buffers(pa.binary(7), 1, [None, buf])
+ buf_on_gpu = arr.buffers()[1]
+ buf_on_gpu_sliced = buf_on_gpu.slice(2)
+
+ buf = pa.py_buffer(b'sting')
+ arr = pa.FixedSizeBinaryArray.from_buffers(pa.binary(5), 1, [None, buf])
+ buf_on_gpu_expected = arr.buffers()[1]
+
+ assert buf_on_gpu_sliced.equals(buf_on_gpu_expected)
+ assert buf.equals(buf_on_gpu_expected)
+ assert buf_on_gpu_sliced.to_pybytes() == buf_on_gpu_expected.to_pybytes()
Review Comment:
Yes, the case with integer data is wrong:
```python
>>> from pyarrow import cuda
>>> ctx = cuda.Context(0)
>>> import numpy as np
>>> import pyarrow as pa
>>> arr = np.arange(4, dtype=np.int32)
>>> cuda_buf = ctx.buffer_from_data(arr)
>>> arr = pa.Array.from_buffers(pa.int32(), 4, [None, cuda_buf])
>>> buf_on_gpu = arr.buffers()[1]
>>> buf_on_gpu.to_pybytes()
b'\xe4\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
>>> cuda_buf.to_pybytes()
b'\x00\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00'
```
and I think the binary would be also - I just noticed I am starting from a
`pyarrow.Buffer `in this case, not CUDA Buffer! Need to correct that.
Will also need to look into why the `to_pybytes` is wrong and is it only for
non-CPU data? I would guess it worked correctly till now for CPU data?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]