jorisvandenbossche commented on code in PR #41889:
URL: https://github.com/apache/arrow/pull/41889#discussion_r1630758114


##########
python/pyarrow/tests/test_io.py:
##########
@@ -669,6 +669,57 @@ def test_allocate_buffer_resizable():
     assert buf.size == 200
 
 
+def test_non_cpu_buffer(pickle_module):
+    cuda = pytest.importorskip("pyarrow.cuda")
+    ctx = cuda.Context(0)
+
+    arr = np.arange(4, dtype=np.int32)
+    cuda_buf = ctx.buffer_from_data(arr)
+
+    arr = pa.Array.from_buffers(pa.int32(), 4, [None, cuda_buf])
+    buf_on_gpu = arr.buffers()[1]
+
+    assert buf_on_gpu.size == cuda_buf.size
+    assert buf_on_gpu.address == cuda_buf.address
+    assert buf_on_gpu.is_cpu == cuda_buf.is_cpu
+
+    repr1 = "<pyarrow.Buffer address="
+    repr2 = "size=16 is_cpu=False is_mutable=True>"
+    assert repr1 in repr(buf_on_gpu)
+    assert repr2 in repr(buf_on_gpu)
+
+    msg = "Implemented only for data on CPU device"
+    with pytest.raises(NotImplementedError, match=msg):
+        buf_on_gpu.hex()
+
+    with pytest.raises(NotImplementedError, match=msg):
+        cuda_buf.hex()
+
+    assert buf_on_gpu.is_mutable
+
+    with pytest.raises(NotImplementedError, match=msg):
+        buf_on_gpu[1]
+
+    with pytest.raises(NotImplementedError, match=msg):
+        cuda_buf[1]
+
+    with pytest.raises(NotImplementedError, match=msg):
+        pickle_module.dumps(buf_on_gpu, protocol=4)
+
+    buf = pa.py_buffer(b'testing')
+    arr = pa.FixedSizeBinaryArray.from_buffers(pa.binary(7), 1, [None, buf])
+    buf_on_gpu = arr.buffers()[1]
+    buf_on_gpu_sliced = buf_on_gpu.slice(2)
+
+    buf = pa.py_buffer(b'sting')
+    arr = pa.FixedSizeBinaryArray.from_buffers(pa.binary(5), 1, [None, buf])
+    buf_on_gpu_expected = arr.buffers()[1]
+
+    assert buf_on_gpu_sliced.equals(buf_on_gpu_expected)
+    assert buf.equals(buf_on_gpu_expected)
+    assert buf_on_gpu_sliced.to_pybytes() == buf_on_gpu_expected.to_pybytes()

Review Comment:
   If you look at the implementation of `to_pybytes`, this is expected:
   
   
https://github.com/apache/arrow/blob/01d2fa0d461869a07b2ffeee517beb8116bd0ce2/python/pyarrow/io.pxi#L1451-L1457
   
   This gets the pointer to the buffer (`data()`) and passes that to a Python 
function. But so Python will just assume this is a pointer to some memory on 
the CPU, and thus if it gets a pointer to GPU memory and interpret that as CPU 
it's just looking at some random memory, giving garbage.
   
   So again like the other methods, we can add an error in case of non-CPU data.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to