This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new 11921873 fix: Ensure ArrowDeviceArray implementation for AppleMetal
passes tests on newer MacOS (#527)
11921873 is described below
commit 119218739cd80c54c7063d2fa612a9dee3cc2c60
Author: Dewey Dunnington <[email protected]>
AuthorDate: Wed Jun 19 13:33:08 2024 +0000
fix: Ensure ArrowDeviceArray implementation for AppleMetal passes tests on
newer MacOS (#527)
This PR fixes the Metal build (which was broken after a previous
change), adds it to CI (thanks to the new GitHub M1 runners!) and fixes
a failure I see locally (but not on CI) whereby the Metal runtime on
some (newer? different?) MacOS environments can wrap arbitrary bytes as
an `MTL::Buffer()`.
If true, this is awesome! But it does mean that in some cases we can
"move" CPU arrays to the GPU without a copy and in some cases we have to
copy into page-aligned memory (and that this can only be tested at
runtime).
---
.github/workflows/build-and-test-device.yaml | 31 +++---------------
src/nanoarrow/nanoarrow_device.c | 2 +-
src/nanoarrow/nanoarrow_device_metal.cc | 15 ++++-----
src/nanoarrow/nanoarrow_device_metal.h | 8 ++---
src/nanoarrow/nanoarrow_device_metal_test.cc | 48 +++++++++++++++++++---------
5 files changed, 49 insertions(+), 55 deletions(-)
diff --git a/.github/workflows/build-and-test-device.yaml
b/.github/workflows/build-and-test-device.yaml
index d1247c30..8362bfc7 100644
--- a/.github/workflows/build-and-test-device.yaml
+++ b/.github/workflows/build-and-test-device.yaml
@@ -35,7 +35,7 @@ permissions:
jobs:
test-c-device:
- runs-on: ubuntu-latest
+ runs-on: ${{ matrix.config.runner }}
name: ${{ matrix.config.label }}
@@ -43,23 +43,15 @@ jobs:
fail-fast: false
matrix:
config:
- - {label: default-build}
- - {label: namespaced-build, cmake_args:
"-DNANOARROW_NAMESPACE=SomeUserNamespace"}
- - {label: bundled-build, cmake_args: "-DNANOARROW_BUNDLE=ON"}
+ - {runner: ubuntu-latest, label: default-build}
+ - {runner: ubuntu-latest, label: namespaced-build, cmake_args:
"-DNANOARROW_NAMESPACE=SomeUserNamespace"}
+ - {runner: ubuntu-latest, label: bundled-build, cmake_args:
"-DNANOARROW_DEVICE_BUNDLE=ON"}
+ - {runner: macOS-latest, label: with-metal, cmake_args:
"-DNANOARROW_DEVICE_WITH_METAL=ON"}
- env:
- SUBDIR: '${{ github.workspace }}'
- NANOARROW_ARROW_TESTING_DIR: '${{ github.workspace }}/arrow-testing'
steps:
- uses: actions/checkout@v4
- - name: Checkout arrow-testing
- uses: actions/checkout@v4
- with:
- repository: apache/arrow-testing
- path: arrow-testing
-
- name: Install memcheck dependencies
if: matrix.config.label == 'default-build'
run: |
@@ -82,9 +74,6 @@ jobs:
- name: Build
run: |
ARROW_PATH="$(pwd)/arrow"
- cd $SUBDIR
- export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/dist/lib
- sudo ldconfig
mkdir build
cd build
cmake .. -DCMAKE_BUILD_TYPE=Debug -DNANOARROW_DEVICE=ON \
@@ -96,8 +85,6 @@ jobs:
- name: Check for non-namespaced symbols in namespaced build
if: matrix.config.label == 'namespaced-build'
run: |
- cd $SUBDIR
-
# Dump all symbols
nm --extern-only build/libnanoarrow_device.a
@@ -113,20 +100,12 @@ jobs:
- name: Run tests
run: |
- cd $SUBDIR
-
- export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/dist/lib
- sudo ldconfig
cd build
ctest -T test --output-on-failure .
- name: Run tests with valgrind
if: matrix.config.label == 'default-build' || matrix.config.label ==
'default-noatomics'
run: |
- cd $SUBDIR
-
- export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/dist/lib
- sudo ldconfig
cd build
ctest -T memcheck .
diff --git a/src/nanoarrow/nanoarrow_device.c b/src/nanoarrow/nanoarrow_device.c
index a2ce4665..e5a3e624 100644
--- a/src/nanoarrow/nanoarrow_device.c
+++ b/src/nanoarrow/nanoarrow_device.c
@@ -493,5 +493,5 @@ ArrowErrorCode ArrowDeviceArrayMoveToDevice(struct
ArrowDeviceArray* src,
NANOARROW_RETURN_NOT_OK(device_dst->array_move(device_src, src,
device_dst, dst));
}
- return ENOTSUP;
+ return NANOARROW_OK;
}
diff --git a/src/nanoarrow/nanoarrow_device_metal.cc
b/src/nanoarrow/nanoarrow_device_metal.cc
index b0d31f1b..1a2cb29d 100644
--- a/src/nanoarrow/nanoarrow_device_metal.cc
+++ b/src/nanoarrow/nanoarrow_device_metal.cc
@@ -144,7 +144,9 @@ struct ArrowDeviceMetalArrayPrivate {
static void ArrowDeviceMetalArrayRelease(struct ArrowArray* array) {
struct ArrowDeviceMetalArrayPrivate* private_data =
(struct ArrowDeviceMetalArrayPrivate*)array->private_data;
- private_data->event->release();
+ if (private_data->event != nullptr) {
+ private_data->event->release();
+ }
ArrowArrayRelease(&private_data->parent);
ArrowFree(private_data);
array->release = NULL;
@@ -224,15 +226,10 @@ static ArrowErrorCode ArrowDeviceMetalBufferMove(struct
ArrowDevice* device_src,
mtl_buffer->release();
ArrowBufferMove(src, dst);
return NANOARROW_OK;
+ } else {
+ // Otherwise, return ENOTSUP to signal that a move is not possible
+ return ENOTSUP;
}
-
- // Otherwise, initialize a new buffer and copy
- struct ArrowBuffer tmp;
- ArrowDeviceMetalInitBuffer(&tmp);
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(&tmp, src->data,
src->size_bytes));
- ArrowBufferMove(&tmp, dst);
- ArrowBufferReset(src);
- return NANOARROW_OK;
} else if (device_src->device_type == ARROW_DEVICE_METAL &&
device_dst->device_type == ARROW_DEVICE_METAL) {
// Metal -> Metal is always just a move
diff --git a/src/nanoarrow/nanoarrow_device_metal.h
b/src/nanoarrow/nanoarrow_device_metal.h
index 52cda74c..cc29fa0f 100644
--- a/src/nanoarrow/nanoarrow_device_metal.h
+++ b/src/nanoarrow/nanoarrow_device_metal.h
@@ -26,10 +26,10 @@
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDeviceMetalDefaultDevice)
#define ArrowDeviceMetalInitDefaultDevice \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDeviceMetalInitDefaultDevice)
-#define ArrowDeviceMetalInitCpuBuffer \
- NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDeviceMetalInitCpuBuffer)
-#define ArrowDeviceMetalInitCpuArrayBuffers \
- NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDeviceMetalInitCpuArrayBuffers)
+#define ArrowDeviceMetalInitBuffer \
+ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDeviceMetalInitBuffer)
+#define ArrowDeviceMetalAlignArrayBuffers \
+ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDeviceMetalAlignArrayBuffers)
#endif
diff --git a/src/nanoarrow/nanoarrow_device_metal_test.cc
b/src/nanoarrow/nanoarrow_device_metal_test.cc
index 0f4cff1b..10a9e83f 100644
--- a/src/nanoarrow/nanoarrow_device_metal_test.cc
+++ b/src/nanoarrow/nanoarrow_device_metal_test.cc
@@ -99,17 +99,26 @@ TEST(NanoarrowDeviceMetal, DeviceGpuBufferMove) {
EXPECT_EQ(buffer.data, nullptr);
ArrowBufferReset(&buffer2);
- // CPU -> GPU without alignment should trigger a copy and release the input
+ // CPU -> GPU without alignment may require a copy
ArrowBufferInit(&buffer);
ASSERT_EQ(ArrowBufferAppend(&buffer, data, sizeof(data)), NANOARROW_OK);
old_ptr = buffer.data;
- ASSERT_EQ(ArrowDeviceBufferMove(cpu, &buffer, gpu, &buffer2), NANOARROW_OK);
- EXPECT_EQ(buffer2.size_bytes, 5);
- EXPECT_NE(buffer2.data, old_ptr);
- EXPECT_EQ(memcmp(buffer2.data, data, sizeof(data)), 0);
- EXPECT_EQ(buffer.data, nullptr);
- ArrowBufferReset(&buffer2);
+ int code = ArrowDeviceBufferMove(cpu, &buffer, gpu, &buffer2);
+ if (code == NANOARROW_OK) {
+ // If the move was reported as a success, ensure it happened
+ ASSERT_EQ(buffer2.data, old_ptr);
+ EXPECT_EQ(buffer.data, nullptr);
+ ASSERT_EQ(buffer2.size_bytes, 5);
+ EXPECT_EQ(memcmp(buffer2.data, data, sizeof(data)), 0);
+ ArrowBufferReset(&buffer2);
+ } else {
+ // Otherwise, ensure the old buffer was left intact
+ ASSERT_EQ(buffer.data, old_ptr);
+ EXPECT_EQ(buffer2.data, nullptr);
+ ASSERT_EQ(buffer.size_bytes, 5);
+ EXPECT_EQ(memcmp(buffer.data, data, sizeof(data)), 0);
+ }
}
TEST(NanoarrowDeviceMetal, DeviceGpuBufferCopy) {
@@ -236,23 +245,32 @@ TEST_P(StringTypeParameterizedTestFixture,
ArrowDeviceMetalArrayViewString) {
EXPECT_EQ(device_array_view.array_view.buffer_views[2].size_bytes, 7);
- // Copy required to Metal
+ // In some MacOS environments, ArrowDeviceArrayMoveToDevice() with buffers
allocated
+ // using ArrowMalloc() will work (i.e., apparently MTL::Buffer can wrap
+ // arbitrary bytes, but only sometimes)
struct ArrowDeviceArray device_array2;
device_array2.array.release = nullptr;
- ASSERT_EQ(ArrowDeviceArrayMoveToDevice(&device_array, metal,
&device_array2), ENOTSUP);
- ASSERT_EQ(ArrowDeviceArrayViewCopy(&device_array_view, metal,
&device_array2),
- NANOARROW_OK);
- ArrowArrayRelease(&device_array.array);
- ASSERT_NE(device_array2.array.release, nullptr);
- ASSERT_EQ(device_array2.device_id, metal->device_id);
+ int code = ArrowDeviceArrayMoveToDevice(&device_array, metal,
&device_array2);
+ if (code == NANOARROW_OK) {
+ // If the move was successful, ensure it actually happened
+ ASSERT_EQ(device_array.array.release, nullptr);
+ ASSERT_NE(device_array2.array.release, nullptr);
+ } else {
+ // If the move was not successful, ensure we can copy to a metal device
array
+ ASSERT_EQ(code, ENOTSUP);
+ ASSERT_EQ(ArrowDeviceArrayViewCopy(&device_array_view, metal,
&device_array2),
+ NANOARROW_OK);
+ }
+
+ // Either way, ensure that the device array is reporting correct values
ASSERT_EQ(ArrowDeviceArrayViewSetArray(&device_array_view, &device_array2,
nullptr),
NANOARROW_OK);
EXPECT_EQ(device_array_view.array_view.buffer_views[2].size_bytes, 7);
EXPECT_EQ(memcmp(device_array_view.array_view.buffer_views[2].data.data,
"abcdefg", 7),
0);
- // Copy shouldn't be required to the CPU
+ // Copy shouldn't be required back to the CPU
ASSERT_EQ(ArrowDeviceArrayMoveToDevice(&device_array2, cpu, &device_array),
NANOARROW_OK);
ASSERT_EQ(ArrowDeviceArrayViewSetArray(&device_array_view, &device_array,
nullptr),