This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new b35e7ddbef GH-43507: [C++] Use ViewOrCopyTo instead of CopyTo when
pretty printing non-CPU data (#43508)
b35e7ddbef is described below
commit b35e7ddbefe394ab7e5a6b1c3f3bb476cdc800f2
Author: Joris Van den Bossche <[email protected]>
AuthorDate: Fri Aug 2 02:10:00 2024 +0200
GH-43507: [C++] Use ViewOrCopyTo instead of CopyTo when pretty printing
non-CPU data (#43508)
### Rationale for this change
When ensuring the data we are pretty-printing is on the CPU, we can use
`ViewOrCopyTo` instead of `CopyTo`, in case the data can be viewed as CPU data
without a copy.
### Are these changes tested?
Yes (I added a test that uses CUDA host memory, which should be a case
where it can be viewed and doesn't need to be copied, but of course the test it
not actually ensuring we avoid the copy, just that the printing works)
### Are there any user-facing changes?
No
* GitHub Issue: #43507
Authored-by: Joris Van den Bossche <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
cpp/src/arrow/pretty_print.cc | 3 ++-
python/pyarrow/tests/test_cuda.py | 11 +++++++++++
2 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc
index 53a6953681..c5905d0c8c 100644
--- a/cpp/src/arrow/pretty_print.cc
+++ b/cpp/src/arrow/pretty_print.cc
@@ -394,7 +394,8 @@ class ArrayPrinter : public PrettyPrinter {
if (array.device_type() != DeviceAllocationType::kCPU) {
// GH-43055: ideally we only copy start/end slices from non-CPU memory
// based on the window size that is being printed
- ARROW_ASSIGN_OR_RAISE(auto array_cpu,
array.CopyTo(default_cpu_memory_manager()));
+ ARROW_ASSIGN_OR_RAISE(auto array_cpu,
+ array.ViewOrCopyTo(default_cpu_memory_manager()));
RETURN_NOT_OK(VisitArrayInline(*array_cpu, this));
} else {
RETURN_NOT_OK(VisitArrayInline(array, this));
diff --git a/python/pyarrow/tests/test_cuda.py
b/python/pyarrow/tests/test_cuda.py
index 61f784a729..8749ab29d0 100644
--- a/python/pyarrow/tests/test_cuda.py
+++ b/python/pyarrow/tests/test_cuda.py
@@ -973,6 +973,17 @@ def test_print_array():
assert str(carr) == str(arr)
[email protected]("size", [10, 100])
+def test_print_array_host(size):
+ buf = cuda.new_host_buffer(size*8)
+ np_arr = np.frombuffer(buf, dtype=np.int64)
+ np_arr[:] = range(size)
+
+ arr = pa.array(range(size), pa.int64())
+ carr = pa.Array.from_buffers(pa.int64(), size, [None, buf])
+ assert str(carr) == str(arr)
+
+
def make_chunked_array(n_elements_per_chunk, n_chunks):
arrs = []
carrs = []