This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new ecd769c3e9 GH-39858: [C++][Device] Add Copy/View slice functions to a 
CPU pointer (#41477)
ecd769c3e9 is described below

commit ecd769c3e9dade8dcb381fba7c2ac059d46a3a17
Author: Alan Stoate <[email protected]>
AuthorDate: Fri May 24 01:32:02 2024 +1000

    GH-39858: [C++][Device] Add Copy/View slice functions to a CPU pointer 
(#41477)
    
    ### Rationale for this change
    Currently ```MemoryManager``` objects define functionality to Copy or View 
entire buffers. Occasionally there is the need to only copy a single value or 
slice from a buffer to a piece of CPU memory (see 
https://github.com/apache/arrow/pull/39770#discussion_r1470135438). It's 
overkill to do a bunch of whole Buffer operations and manually slicing just to 
copy 4 or 8 bytes.
    
    ### What changes are included in this PR?
    Add the ```MemoryManager::CopyBufferSliceToCPU``` function, which initially 
attempts to use memcpy for the specified slice. If this is not possible, it 
defaults to copying the entire buffer and then viewing/copying the slice.
    
    Update ```ArrayImporter::ImportStringValuesBuffer``` to use this function.
    
    ### Are these changes tested?
    
    ```ArrayImporter::ImportStringValuesBuffer```  is tested as a part of  
```arrow-c-bridge-test```
    * GitHub Issue: #39858
    
    Lead-authored-by: Alan Stoate <[email protected]>
    Co-authored-by: Mac Lilly <[email protected]>
    Co-authored-by: Felipe Oliveira Carvalho <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/c/bridge.cc | 27 ++++++++++-----------------
 cpp/src/arrow/device.cc   | 26 ++++++++++++++++++++++++++
 cpp/src/arrow/device.h    |  4 ++++
 3 files changed, 40 insertions(+), 17 deletions(-)

diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index 8c5e3637b6..3e2e04ba0b 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -1871,24 +1871,17 @@ struct ArrayImporter {
   template <typename OffsetType>
   Status ImportStringValuesBuffer(int32_t offsets_buffer_id, int32_t buffer_id,
                                   int64_t byte_width = 1) {
-    if (device_type_ == DeviceAllocationType::kCPU) {
-      auto offsets = data_->GetValues<OffsetType>(offsets_buffer_id);
+    int64_t buffer_size = 0;
+    if (c_struct_->length > 0) {
+      int64_t last_offset_value_offset =
+          (c_struct_->length + c_struct_->offset) * sizeof(OffsetType);
+      OffsetType last_offset_value;
+      RETURN_NOT_OK(MemoryManager::CopyBufferSliceToCPU(
+          data_->buffers[offsets_buffer_id], last_offset_value_offset, 
sizeof(OffsetType),
+          reinterpret_cast<uint8_t*>(&last_offset_value)));
       // Compute visible size of buffer
-      int64_t buffer_size =
-          (c_struct_->length > 0) ? byte_width * offsets[c_struct_->length] : 
0;
-      return ImportBuffer(buffer_id, buffer_size);
-    }
-
-    // we only need the value of the last offset so let's just copy that
-    // one value from device to host.
-    auto single_value_buf =
-        SliceBuffer(data_->buffers[offsets_buffer_id],
-                    c_struct_->length * sizeof(OffsetType), 
sizeof(OffsetType));
-    ARROW_ASSIGN_OR_RAISE(
-        auto cpubuf, Buffer::ViewOrCopy(single_value_buf, 
default_cpu_memory_manager()));
-    auto offsets = cpubuf->data_as<OffsetType>();
-    // Compute visible size of buffer
-    int64_t buffer_size = (c_struct_->length > 0) ? byte_width * offsets[0] : 
0;
+      buffer_size = byte_width * last_offset_value;
+    }
 
     return ImportBuffer(buffer_id, buffer_size);
   }
diff --git a/cpp/src/arrow/device.cc b/cpp/src/arrow/device.cc
index 98b8f7b303..01a2b8df53 100644
--- a/cpp/src/arrow/device.cc
+++ b/cpp/src/arrow/device.cc
@@ -116,6 +116,32 @@ Result<std::shared_ptr<Buffer>> MemoryManager::ViewBuffer(
                                 " on ", to->device()->ToString(), " not 
supported");
 }
 
+Status MemoryManager::CopyBufferSliceToCPU(const std::shared_ptr<Buffer>& buf,
+                                           int64_t offset, int64_t length,
+                                           uint8_t* out_data) {
+  if (ARROW_PREDICT_TRUE(buf->is_cpu())) {
+    memcpy(out_data, buf->data() + offset, static_cast<size_t>(length));
+    return Status::OK();
+  }
+
+  auto& from = buf->memory_manager();
+  auto cpu_mm = default_cpu_memory_manager();
+  // Try a view first
+  auto maybe_buffer_result = from->ViewBufferTo(buf, cpu_mm);
+  if (!COPY_BUFFER_SUCCESS(maybe_buffer_result)) {
+    // View failed, try a copy instead
+    maybe_buffer_result = from->CopyBufferTo(buf, cpu_mm);
+  }
+  ARROW_ASSIGN_OR_RAISE(auto maybe_buffer, std::move(maybe_buffer_result));
+  if (maybe_buffer != nullptr) {
+    memcpy(out_data, maybe_buffer->data() + offset, 
static_cast<size_t>(length));
+    return Status::OK();
+  }
+
+  return Status::NotImplemented("Copying buffer slice from ", 
from->device()->ToString(),
+                                " to CPU not supported");
+}
+
 #undef COPY_BUFFER_RETURN
 #undef COPY_BUFFER_SUCCESS
 
diff --git a/cpp/src/arrow/device.h b/cpp/src/arrow/device.h
index a591167ef9..f5cca0d27d 100644
--- a/cpp/src/arrow/device.h
+++ b/cpp/src/arrow/device.h
@@ -249,6 +249,10 @@ class ARROW_EXPORT MemoryManager : public 
std::enable_shared_from_this<MemoryMan
   static Result<std::shared_ptr<Buffer>> ViewBuffer(
       const std::shared_ptr<Buffer>& source, const 
std::shared_ptr<MemoryManager>& to);
 
+  /// \brief Copy a slice of a buffer into a CPU pointer
+  static Status CopyBufferSliceToCPU(const std::shared_ptr<Buffer>& buf, 
int64_t offset,
+                                     int64_t length, uint8_t* out_data);
+
   /// \brief Create a new SyncEvent.
   ///
   /// This version should construct the appropriate event for the device and

Reply via email to