This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 11921873 fix: Ensure ArrowDeviceArray implementation for AppleMetal 
passes tests on newer MacOS (#527)
11921873 is described below

commit 119218739cd80c54c7063d2fa612a9dee3cc2c60
Author: Dewey Dunnington <[email protected]>
AuthorDate: Wed Jun 19 13:33:08 2024 +0000

    fix: Ensure ArrowDeviceArray implementation for AppleMetal passes tests on 
newer MacOS (#527)
    
    This PR fixes the Metal build (which was broken after a previous
    change), adds it to CI (thanks to the new GitHub M1 runners!) and fixes
    a failure I see locally (but not on CI) whereby the Metal runtime on
    some (newer? different?) MacOS environments can wrap arbitrary bytes as
    an `MTL::Buffer()`.
    
    If true, this is awesome! But it does mean that in some cases we can
    "move" CPU arrays to the GPU without a copy and in some cases we have to
    copy into page-aligned memory (and that this can only be tested at
    runtime).
---
 .github/workflows/build-and-test-device.yaml | 31 +++---------------
 src/nanoarrow/nanoarrow_device.c             |  2 +-
 src/nanoarrow/nanoarrow_device_metal.cc      | 15 ++++-----
 src/nanoarrow/nanoarrow_device_metal.h       |  8 ++---
 src/nanoarrow/nanoarrow_device_metal_test.cc | 48 +++++++++++++++++++---------
 5 files changed, 49 insertions(+), 55 deletions(-)

diff --git a/.github/workflows/build-and-test-device.yaml 
b/.github/workflows/build-and-test-device.yaml
index d1247c30..8362bfc7 100644
--- a/.github/workflows/build-and-test-device.yaml
+++ b/.github/workflows/build-and-test-device.yaml
@@ -35,7 +35,7 @@ permissions:
 jobs:
   test-c-device:
 
-    runs-on: ubuntu-latest
+    runs-on: ${{ matrix.config.runner }}
 
     name: ${{ matrix.config.label }}
 
@@ -43,23 +43,15 @@ jobs:
       fail-fast: false
       matrix:
         config:
-          - {label: default-build}
-          - {label: namespaced-build, cmake_args: 
"-DNANOARROW_NAMESPACE=SomeUserNamespace"}
-          - {label: bundled-build, cmake_args: "-DNANOARROW_BUNDLE=ON"}
+          - {runner: ubuntu-latest, label: default-build}
+          - {runner: ubuntu-latest, label: namespaced-build, cmake_args: 
"-DNANOARROW_NAMESPACE=SomeUserNamespace"}
+          - {runner: ubuntu-latest, label: bundled-build, cmake_args: 
"-DNANOARROW_DEVICE_BUNDLE=ON"}
+          - {runner: macOS-latest, label: with-metal, cmake_args: 
"-DNANOARROW_DEVICE_WITH_METAL=ON"}
 
-    env:
-      SUBDIR: '${{ github.workspace }}'
-      NANOARROW_ARROW_TESTING_DIR: '${{ github.workspace }}/arrow-testing'
 
     steps:
       - uses: actions/checkout@v4
 
-      - name: Checkout arrow-testing
-        uses: actions/checkout@v4
-        with:
-          repository: apache/arrow-testing
-          path: arrow-testing
-
       - name: Install memcheck dependencies
         if: matrix.config.label == 'default-build'
         run: |
@@ -82,9 +74,6 @@ jobs:
       - name: Build
         run: |
           ARROW_PATH="$(pwd)/arrow"
-          cd $SUBDIR
-          export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/dist/lib
-          sudo ldconfig
           mkdir build
           cd build
           cmake .. -DCMAKE_BUILD_TYPE=Debug -DNANOARROW_DEVICE=ON \
@@ -96,8 +85,6 @@ jobs:
       - name: Check for non-namespaced symbols in namespaced build
         if: matrix.config.label == 'namespaced-build'
         run: |
-          cd $SUBDIR
-
           # Dump all symbols
           nm --extern-only build/libnanoarrow_device.a
 
@@ -113,20 +100,12 @@ jobs:
 
       - name: Run tests
         run: |
-          cd $SUBDIR
-
-          export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/dist/lib
-          sudo ldconfig
           cd build
           ctest -T test --output-on-failure .
 
       - name: Run tests with valgrind
         if: matrix.config.label == 'default-build' || matrix.config.label == 
'default-noatomics'
         run: |
-          cd $SUBDIR
-
-          export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/dist/lib
-          sudo ldconfig
           cd build
           ctest -T memcheck .
 
diff --git a/src/nanoarrow/nanoarrow_device.c b/src/nanoarrow/nanoarrow_device.c
index a2ce4665..e5a3e624 100644
--- a/src/nanoarrow/nanoarrow_device.c
+++ b/src/nanoarrow/nanoarrow_device.c
@@ -493,5 +493,5 @@ ArrowErrorCode ArrowDeviceArrayMoveToDevice(struct 
ArrowDeviceArray* src,
     NANOARROW_RETURN_NOT_OK(device_dst->array_move(device_src, src, 
device_dst, dst));
   }
 
-  return ENOTSUP;
+  return NANOARROW_OK;
 }
diff --git a/src/nanoarrow/nanoarrow_device_metal.cc 
b/src/nanoarrow/nanoarrow_device_metal.cc
index b0d31f1b..1a2cb29d 100644
--- a/src/nanoarrow/nanoarrow_device_metal.cc
+++ b/src/nanoarrow/nanoarrow_device_metal.cc
@@ -144,7 +144,9 @@ struct ArrowDeviceMetalArrayPrivate {
 static void ArrowDeviceMetalArrayRelease(struct ArrowArray* array) {
   struct ArrowDeviceMetalArrayPrivate* private_data =
       (struct ArrowDeviceMetalArrayPrivate*)array->private_data;
-  private_data->event->release();
+  if (private_data->event != nullptr) {
+    private_data->event->release();
+  }
   ArrowArrayRelease(&private_data->parent);
   ArrowFree(private_data);
   array->release = NULL;
@@ -224,15 +226,10 @@ static ArrowErrorCode ArrowDeviceMetalBufferMove(struct 
ArrowDevice* device_src,
       mtl_buffer->release();
       ArrowBufferMove(src, dst);
       return NANOARROW_OK;
+    } else {
+      // Otherwise, return ENOTSUP to signal that a move is not possible
+      return ENOTSUP;
     }
-
-    // Otherwise, initialize a new buffer and copy
-    struct ArrowBuffer tmp;
-    ArrowDeviceMetalInitBuffer(&tmp);
-    NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(&tmp, src->data, 
src->size_bytes));
-    ArrowBufferMove(&tmp, dst);
-    ArrowBufferReset(src);
-    return NANOARROW_OK;
   } else if (device_src->device_type == ARROW_DEVICE_METAL &&
              device_dst->device_type == ARROW_DEVICE_METAL) {
     // Metal -> Metal is always just a move
diff --git a/src/nanoarrow/nanoarrow_device_metal.h 
b/src/nanoarrow/nanoarrow_device_metal.h
index 52cda74c..cc29fa0f 100644
--- a/src/nanoarrow/nanoarrow_device_metal.h
+++ b/src/nanoarrow/nanoarrow_device_metal.h
@@ -26,10 +26,10 @@
   NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDeviceMetalDefaultDevice)
 #define ArrowDeviceMetalInitDefaultDevice \
   NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDeviceMetalInitDefaultDevice)
-#define ArrowDeviceMetalInitCpuBuffer \
-  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDeviceMetalInitCpuBuffer)
-#define ArrowDeviceMetalInitCpuArrayBuffers \
-  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDeviceMetalInitCpuArrayBuffers)
+#define ArrowDeviceMetalInitBuffer \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDeviceMetalInitBuffer)
+#define ArrowDeviceMetalAlignArrayBuffers \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDeviceMetalAlignArrayBuffers)
 
 #endif
 
diff --git a/src/nanoarrow/nanoarrow_device_metal_test.cc 
b/src/nanoarrow/nanoarrow_device_metal_test.cc
index 0f4cff1b..10a9e83f 100644
--- a/src/nanoarrow/nanoarrow_device_metal_test.cc
+++ b/src/nanoarrow/nanoarrow_device_metal_test.cc
@@ -99,17 +99,26 @@ TEST(NanoarrowDeviceMetal, DeviceGpuBufferMove) {
   EXPECT_EQ(buffer.data, nullptr);
   ArrowBufferReset(&buffer2);
 
-  // CPU -> GPU without alignment should trigger a copy and release the input
+  // CPU -> GPU without alignment may require a copy
   ArrowBufferInit(&buffer);
   ASSERT_EQ(ArrowBufferAppend(&buffer, data, sizeof(data)), NANOARROW_OK);
   old_ptr = buffer.data;
-  ASSERT_EQ(ArrowDeviceBufferMove(cpu, &buffer, gpu, &buffer2), NANOARROW_OK);
-  EXPECT_EQ(buffer2.size_bytes, 5);
-  EXPECT_NE(buffer2.data, old_ptr);
-  EXPECT_EQ(memcmp(buffer2.data, data, sizeof(data)), 0);
-  EXPECT_EQ(buffer.data, nullptr);
 
-  ArrowBufferReset(&buffer2);
+  int code = ArrowDeviceBufferMove(cpu, &buffer, gpu, &buffer2);
+  if (code == NANOARROW_OK) {
+    // If the move was reported as a success, ensure it happened
+    ASSERT_EQ(buffer2.data, old_ptr);
+    EXPECT_EQ(buffer.data, nullptr);
+    ASSERT_EQ(buffer2.size_bytes, 5);
+    EXPECT_EQ(memcmp(buffer2.data, data, sizeof(data)), 0);
+    ArrowBufferReset(&buffer2);
+  } else {
+    // Otherwise, ensure the old buffer was left intact
+    ASSERT_EQ(buffer.data, old_ptr);
+    EXPECT_EQ(buffer2.data, nullptr);
+    ASSERT_EQ(buffer.size_bytes, 5);
+    EXPECT_EQ(memcmp(buffer.data, data, sizeof(data)), 0);
+  }
 }
 
 TEST(NanoarrowDeviceMetal, DeviceGpuBufferCopy) {
@@ -236,23 +245,32 @@ TEST_P(StringTypeParameterizedTestFixture, 
ArrowDeviceMetalArrayViewString) {
 
   EXPECT_EQ(device_array_view.array_view.buffer_views[2].size_bytes, 7);
 
-  // Copy required to Metal
+  // In some MacOS environments, ArrowDeviceArrayMoveToDevice() with buffers 
allocated
+  // using ArrowMalloc() will work (i.e., apparently MTL::Buffer can wrap
+  // arbitrary bytes, but only sometimes)
   struct ArrowDeviceArray device_array2;
   device_array2.array.release = nullptr;
-  ASSERT_EQ(ArrowDeviceArrayMoveToDevice(&device_array, metal, 
&device_array2), ENOTSUP);
-  ASSERT_EQ(ArrowDeviceArrayViewCopy(&device_array_view, metal, 
&device_array2),
-            NANOARROW_OK);
-  ArrowArrayRelease(&device_array.array);
 
-  ASSERT_NE(device_array2.array.release, nullptr);
-  ASSERT_EQ(device_array2.device_id, metal->device_id);
+  int code = ArrowDeviceArrayMoveToDevice(&device_array, metal, 
&device_array2);
+  if (code == NANOARROW_OK) {
+    // If the move was successful, ensure it actually happened
+    ASSERT_EQ(device_array.array.release, nullptr);
+    ASSERT_NE(device_array2.array.release, nullptr);
+  } else {
+    // If the move was not successful, ensure we can copy to a metal device 
array
+    ASSERT_EQ(code, ENOTSUP);
+    ASSERT_EQ(ArrowDeviceArrayViewCopy(&device_array_view, metal, 
&device_array2),
+              NANOARROW_OK);
+  }
+
+  // Either way, ensure that the device array is reporting correct values
   ASSERT_EQ(ArrowDeviceArrayViewSetArray(&device_array_view, &device_array2, 
nullptr),
             NANOARROW_OK);
   EXPECT_EQ(device_array_view.array_view.buffer_views[2].size_bytes, 7);
   EXPECT_EQ(memcmp(device_array_view.array_view.buffer_views[2].data.data, 
"abcdefg", 7),
             0);
 
-  // Copy shouldn't be required to the CPU
+  // Copy shouldn't be required back to the CPU
   ASSERT_EQ(ArrowDeviceArrayMoveToDevice(&device_array2, cpu, &device_array),
             NANOARROW_OK);
   ASSERT_EQ(ArrowDeviceArrayViewSetArray(&device_array_view, &device_array, 
nullptr),

Reply via email to