This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new fda87c5 feat(python): Add ArrowDeviceArray extension to Python
bindings (#313)
fda87c5 is described below
commit fda87c53fbe8acd19ebdb958806232b5551447a1
Author: Dewey Dunnington <[email protected]>
AuthorDate: Fri Nov 17 16:33:13 2023 -0400
feat(python): Add ArrowDeviceArray extension to Python bindings (#313)
This PR adds basic support for wrapping the `ArrowDeviceArray` in
nanoarrow Python. You have to try pretty hard to get anything that isn't
a CPU array here, but 99% of this is just to get a `repr()` and make the
device array easier to debug/understand. In some future the CUDA and
Metal implementations could live in separate Python packages as well.
```python
import nanoarrow.device
import pyarrow as pa
nanoarrow.device.device_array(
pa.record_batch([pa.array([1, 2, 3])], ["col"])
)
```
```
<nanoarrow.device.DeviceArray>
- device_type: 1
- device_id: 0
- array: <nanoarrow.Array struct>
- length: 3
- offset: 0
- null_count: 0
- buffers: (0,)
- dictionary: NULL
- children[1]:
'col': <nanoarrow.Array int64>
- length: 3
- offset: 0
- null_count: 0
- buffers: (0, 5698482274496)
- dictionary: NULL
- children[0]:
```
Most of the code changes here are to add the appropriate reprs for the
`Array` and `Schema` objects. These should work for both CPU and Device
flavours of the C Data interface.
@jorisvandenbossche the reprs should make it easier for your
`__arrow_c_array__` / `__arrow_c_schema__` testing!
---------
Co-authored-by: Joris Van den Bossche <[email protected]>
---
python/.gitignore | 1 +
python/bootstrap.py | 25 +++-
python/setup.py | 25 ++--
python/src/nanoarrow/__init__.py | 2 +-
python/src/nanoarrow/_lib.pyx | 187 ++++++++++++++++++++----
python/src/nanoarrow/_lib_utils.py | 98 +++++++++++++
python/src/nanoarrow/{__init__.py => device.py} | 14 +-
python/src/nanoarrow/lib.py | 9 +-
python/src/nanoarrow/nanoarrow_device_c.pxd | 85 +++++++++++
python/tests/test_capsules.py | 9 +-
python/{.gitignore => tests/test_device.py} | 60 +++-----
python/tests/test_nanoarrow.py | 61 ++++++--
12 files changed, 487 insertions(+), 89 deletions(-)
diff --git a/python/.gitignore b/python/.gitignore
index cd267c1..d30e198 100644
--- a/python/.gitignore
+++ b/python/.gitignore
@@ -18,6 +18,7 @@
src/nanoarrow/nanoarrow.c
src/nanoarrow/nanoarrow.h
+src/nanoarrow/nanoarrow_device.h
src/nanoarrow/nanoarrow_c.pxd
src/nanoarrow/*.c
diff --git a/python/bootstrap.py b/python/bootstrap.py
index e38f6f9..9e54cb7 100644
--- a/python/bootstrap.py
+++ b/python/bootstrap.py
@@ -159,7 +159,19 @@ def copy_or_generate_nanoarrow_c():
maybe_nanoarrow_h = os.path.join(this_dir, "src/nanoarrow/nanoarrow.h")
maybe_nanoarrow_c = os.path.join(this_dir, "src/nanoarrow/nanoarrow.c")
- for f in (maybe_nanoarrow_c, maybe_nanoarrow_h):
+ maybe_nanoarrow_device_h = os.path.join(
+ this_dir, "src/nanoarrow/nanoarrow_device.h"
+ )
+ maybe_nanoarrow_device_c = os.path.join(
+ this_dir, "src/nanoarrow/nanoarrow_device.c"
+ )
+
+ for f in (
+ maybe_nanoarrow_c,
+ maybe_nanoarrow_h,
+ maybe_nanoarrow_device_h,
+ maybe_nanoarrow_device_c,
+ ):
if os.path.exists(f):
os.unlink(f)
@@ -170,6 +182,17 @@ def copy_or_generate_nanoarrow_c():
has_cmake = os.system("cmake --version") == 0
build_dir = os.path.join(this_dir, "_cmake")
+ if is_in_nanoarrow_repo:
+ device_ext_src = os.path.join(
+ source_dir, "extensions/nanoarrow_device/src/nanoarrow"
+ )
+ shutil.copyfile(
+ os.path.join(device_ext_src, "nanoarrow_device.h"),
maybe_nanoarrow_device_h
+ )
+ shutil.copyfile(
+ os.path.join(device_ext_src, "nanoarrow_device.c"),
maybe_nanoarrow_device_c
+ )
+
if has_cmake and is_cmake_dir and is_in_nanoarrow_repo:
try:
os.mkdir(build_dir)
diff --git a/python/setup.py b/python/setup.py
index eb4dae3..f99d999 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -33,14 +33,18 @@ if os.path.exists(bootstrap_py):
# Set some extra flags for compiling with coverage support
-if os.getenv("NANOARROW_PYTHON_COVERAGE") == "1":
- coverage_compile_args = ["--coverage"]
- coverage_link_args = ["--coverage"]
- coverage_define_macros = [("CYTHON_TRACE", 1)]
+if os.getenv("NANOARROW_COVERAGE") == "1":
+ extra_compile_args = ["--coverage"]
+ extra_link_args = ["--coverage"]
+ extra_define_macros = [("CYTHON_TRACE", 1)]
+elif os.getenv("NANOARROW_DEBUG_EXTENSION") == "1":
+ extra_compile_args = ["-g", "-O0"]
+ extra_link_args = []
+ extra_define_macros = []
else:
- coverage_compile_args = []
- coverage_link_args = []
- coverage_define_macros = []
+ extra_compile_args = []
+ extra_link_args = []
+ extra_define_macros = []
setup(
ext_modules=[
@@ -51,10 +55,11 @@ setup(
sources=[
"src/nanoarrow/_lib.pyx",
"src/nanoarrow/nanoarrow.c",
+ "src/nanoarrow/nanoarrow_device.c",
],
- extra_compile_args=coverage_compile_args,
- extra_link_args=coverage_link_args,
- define_macros=coverage_define_macros,
+ extra_compile_args=extra_compile_args,
+ extra_link_args=extra_link_args,
+ define_macros=extra_define_macros,
)
]
)
diff --git a/python/src/nanoarrow/__init__.py b/python/src/nanoarrow/__init__.py
index 1cde540..46204d3 100644
--- a/python/src/nanoarrow/__init__.py
+++ b/python/src/nanoarrow/__init__.py
@@ -16,4 +16,4 @@
# under the License.
from ._lib import Array, ArrayStream, ArrayView, Schema, c_version # noqa:
F401
-from .lib import array, array_stream, schema # noqa: F401
+from .lib import array, array_stream, schema, array_view # noqa: F401
diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index 61a53e2..e1e8bb9 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -33,7 +33,9 @@ from cpython.bytes cimport PyBytes_FromStringAndSize
from cpython.pycapsule cimport PyCapsule_GetPointer
from cpython cimport Py_buffer
from nanoarrow_c cimport *
+from nanoarrow_device_c cimport *
+from nanoarrow._lib_utils import array_repr, device_array_repr, schema_repr,
device_repr
def c_version():
"""Return the nanoarrow C library version string
@@ -230,18 +232,21 @@ cdef class Schema:
if self._ptr.release == NULL:
raise RuntimeError("schema is released")
- def __repr__(self):
- cdef int64_t n_chars = ArrowSchemaToString(self._ptr, NULL, 0, True)
+ def _to_string(self, recursive=False):
+ cdef int64_t n_chars = ArrowSchemaToString(self._ptr, NULL, 0,
recursive)
cdef char* out = <char*>PyMem_Malloc(n_chars + 1)
if not out:
raise MemoryError()
- ArrowSchemaToString(self._ptr, out, n_chars + 1, True)
+ ArrowSchemaToString(self._ptr, out, n_chars + 1, recursive)
out_str = out.decode("UTF-8")
PyMem_Free(out)
return out_str
+ def __repr__(self):
+ return schema_repr(self)
+
@property
def format(self):
self._assert_valid()
@@ -416,8 +421,9 @@ cdef class Array:
optional reference to a Schema that can be used to safely deserialize
the content. These objects are usually created using `nanoarrow.array()`.
This Python wrapper allows access to array fields but does not
- automatically deserialize their content: use `.view()` to validate and
- deserialize the content into a more easily inspectable object.
+ automatically deserialize their content: use `nanoarrow.array_view()`
+ to validate and deserialize the content into a more easily inspectable
+ object.
Examples
--------
@@ -430,7 +436,7 @@ cdef class Array:
4
>>> array.null_count
1
- >>> array_view = array.view()
+ >>> array_view = na.array_view(array)
"""
cdef object _base
cdef ArrowArray* _ptr
@@ -519,20 +525,8 @@ cdef class Array:
else:
return None
- def view(self):
- cdef ArrayViewHolder holder = ArrayViewHolder()
-
- cdef Error error = Error()
- cdef int result = ArrowArrayViewInitFromSchema(&holder.c_array_view,
- self._schema._ptr,
&error.c_error)
- if result != NANOARROW_OK:
- error.raise_message("ArrowArrayViewInitFromSchema()", result)
-
- result = ArrowArrayViewSetArray(&holder.c_array_view, self._ptr,
&error.c_error)
- if result != NANOARROW_OK:
- error.raise_message("ArrowArrayViewSetArray()", result)
-
- return ArrayView(holder, holder._addr(), self._schema, self)
+ def __repr__(self):
+ return array_repr(self)
cdef class ArrayView:
@@ -551,7 +545,8 @@ cdef class ArrayView:
>>> import pyarrow as pa
>>> import numpy as np
>>> import nanoarrow as na
- >>> array_view = na.array(pa.array(["one", "two", "three", None])).view()
+ >>> array = na.array(pa.array(["one", "two", "three", None]))
+ >>> array_view = na.array_view(array)
>>> np.array(array_view.buffers[1])
array([ 0, 3, 6, 11, 11], dtype=int32)
>>> np.array(array_view.buffers[2])
@@ -560,6 +555,7 @@ cdef class ArrayView:
"""
cdef object _base
cdef ArrowArrayView* _ptr
+ cdef ArrowDevice* _device
cdef Schema _schema
cdef object _base_buffer
@@ -568,6 +564,7 @@ cdef class ArrayView:
self._ptr = <ArrowArrayView*>addr
self._schema = schema
self._base_buffer = base_buffer
+ self._device = ArrowDeviceCpu()
@property
def length(self):
@@ -605,6 +602,26 @@ cdef class ArrayView:
def schema(self):
return self._schema
+ def _assert_cpu(self):
+ if self._device.device_type != ARROW_DEVICE_CPU:
+ raise RuntimeError("ArrayView is not representing a CPU device")
+
+ @staticmethod
+ def from_cpu_array(Array array):
+ cdef ArrayViewHolder holder = ArrayViewHolder()
+
+ cdef Error error = Error()
+ cdef int result = ArrowArrayViewInitFromSchema(&holder.c_array_view,
+ array._schema._ptr,
&error.c_error)
+ if result != NANOARROW_OK:
+ error.raise_message("ArrowArrayViewInitFromSchema()", result)
+
+ result = ArrowArrayViewSetArray(&holder.c_array_view, array._ptr,
&error.c_error)
+ if result != NANOARROW_OK:
+ error.raise_message("ArrowArrayViewSetArray()", result)
+
+ return ArrayView(holder, holder._addr(), array._schema, array)
+
cdef class SchemaChildren:
"""Wrapper for a lazily-resolved list of Schema children
@@ -706,13 +723,16 @@ cdef class ArrayViewChildren:
k = int(k)
if k < 0 or k >= self._length:
raise IndexError(f"{k} out of range [0, {self._length})")
- return ArrayView(
+ cdef ArrayView child = ArrayView(
self._parent,
self._child_addr(k),
self._parent._schema.children[k],
None
)
+ child._device = self._parent._device
+ return child
+
cdef _child_addr(self, int64_t i):
cdef ArrowArrayView** children = self._parent._ptr.children
cdef ArrowArrayView* child = children[i]
@@ -731,17 +751,19 @@ cdef class BufferView:
cdef ArrowBufferView* _ptr
cdef ArrowBufferType _buffer_type
cdef ArrowType _buffer_data_type
+ cdef ArrowDevice* _device
cdef Py_ssize_t _element_size_bits
cdef Py_ssize_t _shape
cdef Py_ssize_t _strides
def __cinit__(self, object base, uintptr_t addr,
ArrowBufferType buffer_type, ArrowType buffer_data_type,
- Py_ssize_t element_size_bits):
+ Py_ssize_t element_size_bits, uintptr_t device):
self._base = base
self._ptr = <ArrowBufferView*>addr
self._buffer_type = buffer_type
self._buffer_data_type = buffer_data_type
+ self._device = <ArrowDevice*>device
self._element_size_bits = element_size_bits
self._strides = self._item_size()
self._shape = self._ptr.size_bytes // self._strides
@@ -784,6 +806,9 @@ cdef class BufferView:
return "B"
def __getbuffer__(self, Py_buffer *buffer, int flags):
+ if self._device.device_type != ARROW_DEVICE_CPU:
+ raise RuntimeError("nanoarrow.BufferView is not a CPU array")
+
buffer.buf = <void*>self._ptr.data.data
buffer.format = self._get_format()
buffer.internal = NULL
@@ -830,7 +855,8 @@ cdef class ArrayViewBuffers:
<uintptr_t>buffer_view,
self._array_view._ptr.layout.buffer_type[k],
self._array_view._ptr.layout.buffer_data_type[k],
- self._array_view._ptr.layout.element_size_bits[k]
+ self._array_view._ptr.layout.element_size_bits[k],
+ <uintptr_t>self._array_view._device
)
@@ -851,7 +877,20 @@ cdef class ArrayStream:
>>> pa_reader = pa.RecordBatchReader.from_batches(pa_batch.schema,
[pa_batch])
>>> array_stream = na.array_stream(pa_reader)
>>> array_stream.get_schema()
- struct<col1: int32>
+ <nanoarrow.Schema struct>
+ - format: '+s'
+ - name: ''
+ - flags: 0
+ - metadata: NULL
+ - dictionary: NULL
+ - children[1]:
+ 'col1': <nanoarrow.Schema int32>
+ - format: 'i'
+ - name: 'col1'
+ - flags: 2
+ - metadata: NULL
+ - dictionary: NULL
+ - children[0]:
>>> array_stream.get_next().length
3
>>> array_stream.get_next() is None
@@ -964,3 +1003,101 @@ cdef class ArrayStream:
def __next__(self):
return self.get_next()
+
+ @staticmethod
+ def allocate():
+ base = ArrayStreamHolder()
+ return ArrayStream(base, base._addr())
+
+
+cdef class DeviceArrayHolder:
+ """Memory holder for an ArrowDeviceArray
+
+ This class is responsible for the lifecycle of the ArrowDeviceArray
+ whose memory it is responsible. When this object is deleted,
+ a non-NULL release callback is invoked.
+ """
+ cdef ArrowDeviceArray c_array
+
+ def __cinit__(self):
+ self.c_array.array.release = NULL
+
+ def __dealloc__(self):
+ if self.c_array.array.release != NULL:
+ self.c_array.array.release(&self.c_array.array)
+
+ def _addr(self):
+ return <uintptr_t>&self.c_array
+
+cdef class Device:
+ """ArrowDevice wrapper
+
+ The ArrowDevice structure is a nanoarrow internal struct (i.e.,
+ not ABI stable) that contains callbacks for device operations
+ beyond its type and identifier (e.g., copy buffers to or from
+ a device).
+ """
+ cdef object _base
+ cdef ArrowDevice* _ptr
+
+ def __cinit__(self, object base, uintptr_t addr):
+ self._base = base,
+ self._ptr = <ArrowDevice*>addr
+
+ def _array_init(self, uintptr_t array_addr, Schema schema):
+ cdef ArrowArray* array_ptr = <ArrowArray*>array_addr
+ cdef DeviceArrayHolder holder = DeviceArrayHolder()
+ cdef int result = ArrowDeviceArrayInit(self._ptr, &holder.c_array,
array_ptr)
+ if result != NANOARROW_OK:
+ Error.raise_error("ArrowDevice::init_array", result)
+
+ return DeviceArray(holder, holder._addr(), schema)
+
+ def __repr__(self):
+ return device_repr(self)
+
+ @property
+ def device_type(self):
+ return self._ptr.device_type
+
+ @property
+ def device_id(self):
+ return self._ptr.device_id
+
+ @staticmethod
+ def resolve(ArrowDeviceType device_type, int64_t device_id):
+ if device_type == ARROW_DEVICE_CPU:
+ return Device.cpu()
+ else:
+ raise ValueError(f"Device not found for type
{device_type}/{device_id}")
+
+ @staticmethod
+ def cpu():
+ # The CPU device is statically allocated (so base is None)
+ return Device(None, <uintptr_t>ArrowDeviceCpu())
+
+
+cdef class DeviceArray:
+ cdef object _base
+ cdef ArrowDeviceArray* _ptr
+ cdef Schema _schema
+
+ def __cinit__(self, object base, uintptr_t addr, Schema schema):
+ self._base = base
+ self._ptr = <ArrowDeviceArray*>addr
+ self._schema = schema
+
+ @property
+ def device_type(self):
+ return self._ptr.device_type
+
+ @property
+ def device_id(self):
+ return self._ptr.device_id
+
+ @property
+ def array(self):
+ return Array(self, <uintptr_t>&self._ptr.array, self._schema)
+
+ def __repr__(self):
+ return device_array_repr(self)
diff --git a/python/src/nanoarrow/_lib_utils.py
b/python/src/nanoarrow/_lib_utils.py
new file mode 100644
index 0000000..abbd1fc
--- /dev/null
+++ b/python/src/nanoarrow/_lib_utils.py
@@ -0,0 +1,98 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# The functions here are imported in _lib.pyx. They're defined here
+# instead of there to make it easier to iterate (no need to rebuild
+# after editing when working with an editable installation)
+
+
+def schema_repr(schema, indent=0):
+ indent_str = " " * indent
+ if schema._addr() == 0:
+ return "<NULL nanoarrow.Schema>"
+ elif not schema.is_valid():
+ return "<released nanoarrow.Schema>"
+
+ lines = [f"<nanoarrow.Schema {schema._to_string()}>"]
+
+ for attr in ("format", "name", "flags"):
+ attr_repr = repr(getattr(schema, attr))
+ lines.append(f"{indent_str}- {attr}: {attr_repr}")
+
+ metadata = schema.metadata
+ if schema.metadata is None:
+ lines.append(f"{indent_str}- metadata: NULL")
+ else:
+ lines.append(f"{indent_str}- metadata:")
+ for key, value in metadata:
+ lines.append(f"{indent_str} - {repr(key)}: {repr(value)}")
+
+ if schema.dictionary:
+ dictionary_repr = schema_repr(schema.dictionary, indent=indent + 2)
+ lines.append(f"{indent_str}- dictionary: {dictionary_repr}")
+ else:
+ lines.append(f"{indent_str}- dictionary: NULL")
+
+ children = schema.children
+ lines.append(f"{indent_str}- children[{len(children)}]:")
+ for child in children:
+ child_repr = schema_repr(child, indent=indent + 4)
+ lines.append(f"{indent_str} {repr(child.name)}: {child_repr}")
+
+ return "\n".join(lines)
+
+
+def array_repr(array, indent=0):
+ indent_str = " " * indent
+ if array._addr() == 0:
+ return "<NULL nanoarrow.Array>"
+ elif not array.is_valid():
+ return "<released nanoarrow.Array>"
+
+ lines = [f"<nanoarrow.Array {array.schema._to_string()}>"]
+ for attr in ("length", "offset", "null_count", "buffers"):
+ attr_repr = repr(getattr(array, attr))
+ lines.append(f"{indent_str}- {attr}: {attr_repr}")
+
+ if array.dictionary:
+ dictionary_repr = array_repr(array.dictionary, indent=indent + 2)
+ lines.append(f"{indent_str}- dictionary: {dictionary_repr}")
+ else:
+ lines.append(f"{indent_str}- dictionary: NULL")
+
+ children = array.children
+ lines.append(f"{indent_str}- children[{len(children)}]:")
+ for child in children:
+ child_repr = array_repr(child, indent=indent + 4)
+ lines.append(f"{indent_str} {repr(child.schema.name)}: {child_repr}")
+
+ return "\n".join(lines)
+
+
+def device_array_repr(device_array):
+ title_line = "<nanoarrow.device.DeviceArray>"
+ device_type = f"- device_type: {device_array.device_type}"
+ device_id = f"- device_id: {device_array.device_id}"
+ array = f"- array: {array_repr(device_array.array, indent=2)}"
+ return "\n".join((title_line, device_type, device_id, array))
+
+
+def device_repr(device):
+ title_line = "<nanoarrow.device.Device>"
+ device_type = f"- device_type: {device.device_type}"
+ device_id = f"- device_id: {device.device_id}"
+ return "\n".join([title_line, device_type, device_id])
diff --git a/python/src/nanoarrow/__init__.py b/python/src/nanoarrow/device.py
similarity index 73%
copy from python/src/nanoarrow/__init__.py
copy to python/src/nanoarrow/device.py
index 1cde540..9fe4151 100644
--- a/python/src/nanoarrow/__init__.py
+++ b/python/src/nanoarrow/device.py
@@ -15,5 +15,15 @@
# specific language governing permissions and limitations
# under the License.
-from ._lib import Array, ArrayStream, ArrayView, Schema, c_version # noqa:
F401
-from .lib import array, array_stream, schema # noqa: F401
+from nanoarrow._lib import Device, DeviceArray
+from nanoarrow.lib import array
+
+
+def device_array(obj):
+ if isinstance(obj, DeviceArray):
+ return obj
+
+ # Only CPU for now
+ cpu_array = array(obj)
+
+ return Device.cpu()._array_init(cpu_array._addr(), cpu_array.schema)
diff --git a/python/src/nanoarrow/lib.py b/python/src/nanoarrow/lib.py
index 6625aff..43e91dd 100644
--- a/python/src/nanoarrow/lib.py
+++ b/python/src/nanoarrow/lib.py
@@ -15,7 +15,14 @@
# specific language governing permissions and limitations
# under the License.
-from ._lib import Array, ArrayStream, Schema
+from nanoarrow._lib import Array, ArrayStream, ArrayView, Schema
+
+
+def array_view(obj):
+ if isinstance(obj, ArrayView):
+ return obj
+
+ return ArrayView.from_cpu_array(array(obj))
def schema(obj):
diff --git a/python/src/nanoarrow/nanoarrow_device_c.pxd
b/python/src/nanoarrow/nanoarrow_device_c.pxd
new file mode 100644
index 0000000..f2a65a9
--- /dev/null
+++ b/python/src/nanoarrow/nanoarrow_device_c.pxd
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from libc.stdint cimport int32_t, int64_t
+
+from nanoarrow_c cimport *
+
+cdef extern from "nanoarrow_device.h" nogil:
+
+ ctypedef int32_t ArrowDeviceType
+
+ int32_t ARROW_DEVICE_CPU
+ int32_t ARROW_DEVICE_CUDA
+ int32_t ARROW_DEVICE_CUDA_HOST
+ int32_t ARROW_DEVICE_METAL
+
+ struct ArrowDeviceArray:
+ ArrowArray array
+ int64_t device_id
+ ArrowDeviceType device_type
+ void* sync_event
+
+ struct ArrowDevice:
+ ArrowDeviceType device_type
+ int64_t device_id
+ ArrowErrorCode (*array_init)(ArrowDevice* device,
+ ArrowDeviceArray* device_array,
+ ArrowArray* array)
+ ArrowErrorCode (*array_move)(ArrowDevice* device_src,
+ ArrowDeviceArray* src,
+ ArrowDevice* device_dst,
+ ArrowDeviceArray* dst)
+ ArrowErrorCode (*buffer_init)(ArrowDevice* device_src,
+ ArrowBufferView src,
+ ArrowDevice* device_dst, ArrowBuffer*
dst)
+ ArrowErrorCode (*buffer_move)(ArrowDevice* device_src, ArrowBuffer*
src,
+ ArrowDevice* device_dst, ArrowBuffer*
dst)
+ ArrowErrorCode (*buffer_copy)(ArrowDevice* device_src,
+ ArrowBufferView src,
+ ArrowDevice* device_dst,
+ ArrowBufferView dst)
+ ArrowErrorCode (*synchronize_event)(ArrowDevice* device, void*
sync_event,
+ ArrowError* error)
+ void (*release)(ArrowDevice* device)
+ void* private_data
+
+
+ struct ArrowDeviceArrayView:
+ ArrowDevice* device
+ ArrowArrayView array_view
+
+
+ ArrowErrorCode ArrowDeviceArrayInit(ArrowDevice* device,
+ ArrowDeviceArray* device_array,
+ ArrowArray* array)
+
+ void ArrowDeviceArrayViewInit(ArrowDeviceArrayView* device_array_view)
+
+ ArrowErrorCode ArrowDeviceArrayViewSetArrayMinimal(ArrowDeviceArrayView*
device_array_view,
+ ArrowDeviceArray*
device_array,
+ ArrowError* error)
+
+ ArrowErrorCode ArrowDeviceArrayViewSetArray(ArrowDeviceArrayView*
device_array_view,
+ ArrowDeviceArray* device_array,
+ ArrowError* error)
+
+ ArrowErrorCode ArrowDeviceArrayViewCopy(ArrowDeviceArrayView* src,
+ ArrowDevice* device_dst,
+ ArrowDeviceArray* dst)
+
+ ArrowDevice* ArrowDeviceCpu()
diff --git a/python/tests/test_capsules.py b/python/tests/test_capsules.py
index f42f57e..e14dc8c 100644
--- a/python/tests/test_capsules.py
+++ b/python/tests/test_capsules.py
@@ -52,7 +52,7 @@ def test_schema_import():
# some basic validation
assert schema.is_valid()
assert schema.format == "+s"
- assert str(schema) == "struct<some_name: int32>"
+ assert schema._to_string(recursive=True) == "struct<some_name: int32>"
def test_array_import():
@@ -63,7 +63,7 @@ def test_array_import():
# some basic validation
assert array.is_valid()
assert array.length == 3
- assert str(array.schema) == "int32"
+ assert array.schema._to_string(recursive=True) == "int32"
def test_array_stream_import():
@@ -78,4 +78,7 @@ def test_array_stream_import():
assert array_stream.is_valid()
array = array_stream.get_next()
assert array.length == 3
- assert str(array_stream.get_schema()) == "struct<some_column: int32>"
+ assert (
+ array_stream.get_schema()._to_string(recursive=True)
+ == "struct<some_column: int32>"
+ )
diff --git a/python/.gitignore b/python/tests/test_device.py
similarity index 58%
copy from python/.gitignore
copy to python/tests/test_device.py
index cd267c1..1426e36 100644
--- a/python/.gitignore
+++ b/python/tests/test_device.py
@@ -1,4 +1,3 @@
-
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -16,39 +15,26 @@
# specific language governing permissions and limitations
# under the License.
-src/nanoarrow/nanoarrow.c
-src/nanoarrow/nanoarrow.h
-src/nanoarrow/nanoarrow_c.pxd
-src/nanoarrow/*.c
-
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-*$py.class
-
-# C extensions
-*.so
-
-# Distribution / packaging
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-pip-wheel-metadata/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-
-# Unit test / coverage reports
-.pytest_cache/
+import pyarrow as pa
+
+from nanoarrow import device
+
+
+def test_cpu_device():
+ cpu = device.Device.cpu()
+ assert cpu.device_type == 1
+ assert cpu.device_id == 0
+ assert "device_type: 1" in repr(cpu)
+
+ cpu = device.Device.resolve(1, 0)
+ assert cpu.device_type == 1
+
+ pa_array = pa.array([1, 2, 3])
+
+ darray = device.device_array(pa_array)
+ assert darray.device_type == 1
+ assert darray.device_id == 0
+ assert darray.array.length == 3
+ assert "device_type: 1" in repr(darray)
+
+ assert device.device_array(darray) is darray
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index da0be6a..816ff14 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -49,13 +49,29 @@ def test_array_helper():
assert isinstance(array, na.Array)
with pytest.raises(TypeError):
- na.schema(None)
+ na.array(None)
+
+
+def test_array_stream_helper():
+ array_stream = na.ArrayStream.allocate()
+ assert na.array_stream(array_stream) is array_stream
+
+ with pytest.raises(TypeError):
+ na.array_stream(None)
+
+
+def test_array_view_helper():
+ array = na.array(pa.array([1, 2, 3]))
+ view = na.array_view(array)
+ assert isinstance(view, na.ArrayView)
+ assert na.array_view(view) is view
def test_schema_basic():
schema = na.Schema.allocate()
assert schema.is_valid() is False
- assert repr(schema) == "[invalid: schema is released]"
+ assert schema._to_string() == "[invalid: schema is released]"
+ assert repr(schema) == "<released nanoarrow.Schema>"
schema = na.schema(pa.schema([pa.field("some_name", pa.int32())]))
@@ -65,7 +81,8 @@ def test_schema_basic():
assert len(schema.children) == 1
assert schema.children[0].format == "i"
assert schema.children[0].name == "some_name"
- assert repr(schema.children[0]) == "int32"
+ assert schema.children[0]._to_string() == "int32"
+ assert "<nanoarrow.Schema int32>" in repr(schema)
assert schema.dictionary is None
with pytest.raises(IndexError):
@@ -76,6 +93,7 @@ def test_schema_dictionary():
schema = na.schema(pa.dictionary(pa.int32(), pa.utf8()))
assert schema.format == "i"
assert schema.dictionary.format == "u"
+ assert "dictionary: <nanoarrow.Schema string" in repr(schema)
def test_schema_metadata():
@@ -87,6 +105,7 @@ def test_schema_metadata():
meta2 = {k: v for k, v in schema.metadata}
assert list(meta2.keys()) == ["key1", "key2"]
assert list(meta2.values()) == [b"value1", b"value2"]
+ assert "'key1': b'value1'" in repr(schema)
def test_schema_view():
@@ -150,6 +169,12 @@ def test_schema_view_extra_params():
assert view.extension_metadata == b"some_metadata"
+def test_array_empty():
+ array = na.Array.allocate(na.Schema.allocate())
+ assert array.is_valid() is False
+ assert repr(array) == "<released nanoarrow.Array>"
+
+
def test_array():
array = na.array(pa.array([1, 2, 3], pa.int32()))
assert array.is_valid() is True
@@ -160,14 +185,30 @@ def test_array():
assert array.buffers[0] == 0
assert len(array.children) == 0
assert array.dictionary is None
+ assert "<nanoarrow.Array int32" in repr(array)
+
+
+def test_array_recursive():
+ array = na.array(pa.record_batch([pa.array([1, 2, 3], pa.int32())],
["col"]))
+ assert len(array.children) == 1
+ assert array.children[0].length == 3
+ assert array.children[0].schema._to_string() == "int32"
+ assert "'col': <nanoarrow.Array int32" in repr(array)
with pytest.raises(IndexError):
array.children[1]
+def test_array_dictionary():
+ array = na.array(pa.array(["a", "b", "b"]).dictionary_encode())
+ assert array.length == 3
+ assert array.dictionary.length == 2
+ assert "dictionary: <nanoarrow.Array string>" in repr(array)
+
+
def test_array_view():
array = na.array(pa.array([1, 2, 3], pa.int32()))
- view = array.view()
+ view = na.array_view(array)
assert view.schema is array.schema
@@ -198,7 +239,7 @@ def test_array_view_recursive():
assert array.children[0].length == 3
assert array.children[0].schema._addr() == array.schema.children[0]._addr()
- view = array.view()
+ view = na.array_view(array)
assert len(view.buffers) == 1
assert len(view.children) == 1
assert view.schema._addr() == array.schema._addr()
@@ -215,7 +256,7 @@ def test_array_view_dictionary():
assert array.schema.format == "i"
assert array.dictionary.schema.format == "u"
- view = array.view()
+ view = na.array_view(array)
assert len(view.buffers) == 2
assert len(view.dictionary.buffers) == 3
@@ -235,14 +276,14 @@ def test_buffers_data():
]
for pa_type, np_type in data_types:
- view = na.array(pa.array([0, 1, 2], pa_type)).view()
+ view = na.array_view(pa.array([0, 1, 2], pa_type))
np.testing.assert_array_equal(
np.array(view.buffers[1]), np.array([0, 1, 2], np_type)
)
def test_buffers_string():
- view = na.array(pa.array(["a", "bc", "def"])).view()
+ view = na.array_view(pa.array(["a", "bc", "def"]))
assert view.buffers[0] is None
np.testing.assert_array_equal(
@@ -254,7 +295,7 @@ def test_buffers_string():
def test_buffers_binary():
- view = na.array(pa.array([b"a", b"bc", b"def"])).view()
+ view = na.array_view(pa.array([b"a", b"bc", b"def"]))
assert view.buffers[0] is None
np.testing.assert_array_equal(
@@ -265,6 +306,8 @@ def test_buffers_binary():
def test_array_stream():
array_stream = na.ArrayStream.allocate()
+ assert na.array_stream(array_stream) is array_stream
+
assert array_stream.is_valid() is False
with pytest.raises(RuntimeError):
array_stream.get_schema()