This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new fda87c5  feat(python): Add ArrowDeviceArray extension to Python 
bindings (#313)
fda87c5 is described below

commit fda87c53fbe8acd19ebdb958806232b5551447a1
Author: Dewey Dunnington <[email protected]>
AuthorDate: Fri Nov 17 16:33:13 2023 -0400

    feat(python): Add ArrowDeviceArray extension to Python bindings (#313)
    
    This PR adds basic support for wrapping the `ArrowDeviceArray` in
    nanoarrow Python. You have to try pretty hard to get anything that isn't
    a CPU array here, but 99% of this is just to get a `repr()` and make the
    device array easier to debug/understand. In some future the CUDA and
    Metal implementations could live in separate Python packages as well.
    
    ```python
    import nanoarrow.device
    import pyarrow as pa
    
    nanoarrow.device.device_array(
        pa.record_batch([pa.array([1, 2, 3])], ["col"])
    )
    ```
    
    ```
    <nanoarrow.device.DeviceArray>
    - device_type: 1
    - device_id: 0
    - array: <nanoarrow.Array struct>
      - length: 3
      - offset: 0
      - null_count: 0
      - buffers: (0,)
      - dictionary: NULL
      - children[1]:
        'col': <nanoarrow.Array int64>
          - length: 3
          - offset: 0
          - null_count: 0
          - buffers: (0, 5698482274496)
          - dictionary: NULL
          - children[0]:
    ```
    
    Most of the code changes here are to add the appropriate reprs for the
    `Array` and `Schema` objects. These should work for both CPU and Device
    flavours of the C Data interface.
    
    @jorisvandenbossche the reprs should make it easier for your
    `__arrow_c_array__` / `__arrow_c_schema__` testing!
    
    ---------
    
    Co-authored-by: Joris Van den Bossche <[email protected]>
---
 python/.gitignore                               |   1 +
 python/bootstrap.py                             |  25 +++-
 python/setup.py                                 |  25 ++--
 python/src/nanoarrow/__init__.py                |   2 +-
 python/src/nanoarrow/_lib.pyx                   | 187 ++++++++++++++++++++----
 python/src/nanoarrow/_lib_utils.py              |  98 +++++++++++++
 python/src/nanoarrow/{__init__.py => device.py} |  14 +-
 python/src/nanoarrow/lib.py                     |   9 +-
 python/src/nanoarrow/nanoarrow_device_c.pxd     |  85 +++++++++++
 python/tests/test_capsules.py                   |   9 +-
 python/{.gitignore => tests/test_device.py}     |  60 +++-----
 python/tests/test_nanoarrow.py                  |  61 ++++++--
 12 files changed, 487 insertions(+), 89 deletions(-)

diff --git a/python/.gitignore b/python/.gitignore
index cd267c1..d30e198 100644
--- a/python/.gitignore
+++ b/python/.gitignore
@@ -18,6 +18,7 @@
 
 src/nanoarrow/nanoarrow.c
 src/nanoarrow/nanoarrow.h
+src/nanoarrow/nanoarrow_device.h
 src/nanoarrow/nanoarrow_c.pxd
 src/nanoarrow/*.c
 
diff --git a/python/bootstrap.py b/python/bootstrap.py
index e38f6f9..9e54cb7 100644
--- a/python/bootstrap.py
+++ b/python/bootstrap.py
@@ -159,7 +159,19 @@ def copy_or_generate_nanoarrow_c():
 
     maybe_nanoarrow_h = os.path.join(this_dir, "src/nanoarrow/nanoarrow.h")
     maybe_nanoarrow_c = os.path.join(this_dir, "src/nanoarrow/nanoarrow.c")
-    for f in (maybe_nanoarrow_c, maybe_nanoarrow_h):
+    maybe_nanoarrow_device_h = os.path.join(
+        this_dir, "src/nanoarrow/nanoarrow_device.h"
+    )
+    maybe_nanoarrow_device_c = os.path.join(
+        this_dir, "src/nanoarrow/nanoarrow_device.c"
+    )
+
+    for f in (
+        maybe_nanoarrow_c,
+        maybe_nanoarrow_h,
+        maybe_nanoarrow_device_h,
+        maybe_nanoarrow_device_c,
+    ):
         if os.path.exists(f):
             os.unlink(f)
 
@@ -170,6 +182,17 @@ def copy_or_generate_nanoarrow_c():
     has_cmake = os.system("cmake --version") == 0
     build_dir = os.path.join(this_dir, "_cmake")
 
+    if is_in_nanoarrow_repo:
+        device_ext_src = os.path.join(
+            source_dir, "extensions/nanoarrow_device/src/nanoarrow"
+        )
+        shutil.copyfile(
+            os.path.join(device_ext_src, "nanoarrow_device.h"), 
maybe_nanoarrow_device_h
+        )
+        shutil.copyfile(
+            os.path.join(device_ext_src, "nanoarrow_device.c"), 
maybe_nanoarrow_device_c
+        )
+
     if has_cmake and is_cmake_dir and is_in_nanoarrow_repo:
         try:
             os.mkdir(build_dir)
diff --git a/python/setup.py b/python/setup.py
index eb4dae3..f99d999 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -33,14 +33,18 @@ if os.path.exists(bootstrap_py):
 
 
 # Set some extra flags for compiling with coverage support
-if os.getenv("NANOARROW_PYTHON_COVERAGE") == "1":
-    coverage_compile_args = ["--coverage"]
-    coverage_link_args = ["--coverage"]
-    coverage_define_macros = [("CYTHON_TRACE", 1)]
+if os.getenv("NANOARROW_COVERAGE") == "1":
+    extra_compile_args = ["--coverage"]
+    extra_link_args = ["--coverage"]
+    extra_define_macros = [("CYTHON_TRACE", 1)]
+elif os.getenv("NANOARROW_DEBUG_EXTENSION") == "1":
+    extra_compile_args = ["-g", "-O0"]
+    extra_link_args = []
+    extra_define_macros = []
 else:
-    coverage_compile_args = []
-    coverage_link_args = []
-    coverage_define_macros = []
+    extra_compile_args = []
+    extra_link_args = []
+    extra_define_macros = []
 
 setup(
     ext_modules=[
@@ -51,10 +55,11 @@ setup(
             sources=[
                 "src/nanoarrow/_lib.pyx",
                 "src/nanoarrow/nanoarrow.c",
+                "src/nanoarrow/nanoarrow_device.c",
             ],
-            extra_compile_args=coverage_compile_args,
-            extra_link_args=coverage_link_args,
-            define_macros=coverage_define_macros,
+            extra_compile_args=extra_compile_args,
+            extra_link_args=extra_link_args,
+            define_macros=extra_define_macros,
         )
     ]
 )
diff --git a/python/src/nanoarrow/__init__.py b/python/src/nanoarrow/__init__.py
index 1cde540..46204d3 100644
--- a/python/src/nanoarrow/__init__.py
+++ b/python/src/nanoarrow/__init__.py
@@ -16,4 +16,4 @@
 # under the License.
 
 from ._lib import Array, ArrayStream, ArrayView, Schema, c_version  # noqa: 
F401
-from .lib import array, array_stream, schema  # noqa: F401
+from .lib import array, array_stream, schema, array_view  # noqa: F401
diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index 61a53e2..e1e8bb9 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -33,7 +33,9 @@ from cpython.bytes cimport PyBytes_FromStringAndSize
 from cpython.pycapsule cimport PyCapsule_GetPointer
 from cpython cimport Py_buffer
 from nanoarrow_c cimport *
+from nanoarrow_device_c cimport *
 
+from nanoarrow._lib_utils import array_repr, device_array_repr, schema_repr, 
device_repr
 
 def c_version():
     """Return the nanoarrow C library version string
@@ -230,18 +232,21 @@ cdef class Schema:
         if self._ptr.release == NULL:
             raise RuntimeError("schema is released")
 
-    def __repr__(self):
-        cdef int64_t n_chars = ArrowSchemaToString(self._ptr, NULL, 0, True)
+    def _to_string(self, recursive=False):
+        cdef int64_t n_chars = ArrowSchemaToString(self._ptr, NULL, 0, 
recursive)
         cdef char* out = <char*>PyMem_Malloc(n_chars + 1)
         if not out:
             raise MemoryError()
 
-        ArrowSchemaToString(self._ptr, out, n_chars + 1, True)
+        ArrowSchemaToString(self._ptr, out, n_chars + 1, recursive)
         out_str = out.decode("UTF-8")
         PyMem_Free(out)
 
         return out_str
 
+    def __repr__(self):
+        return schema_repr(self)
+
     @property
     def format(self):
         self._assert_valid()
@@ -416,8 +421,9 @@ cdef class Array:
     optional reference to a Schema that can be used to safely deserialize
     the content. These objects are usually created using `nanoarrow.array()`.
     This Python wrapper allows access to array fields but does not
-    automatically deserialize their content: use `.view()` to validate and
-    deserialize the content into a more easily inspectable object.
+    automatically deserialize their content: use `nanoarrow.array_view()`
+    to validate and deserialize the content into a more easily inspectable
+    object.
 
     Examples
     --------
@@ -430,7 +436,7 @@ cdef class Array:
     4
     >>> array.null_count
     1
-    >>> array_view = array.view()
+    >>> array_view = na.array_view(array)
     """
     cdef object _base
     cdef ArrowArray* _ptr
@@ -519,20 +525,8 @@ cdef class Array:
         else:
             return None
 
-    def view(self):
-        cdef ArrayViewHolder holder = ArrayViewHolder()
-
-        cdef Error error = Error()
-        cdef int result = ArrowArrayViewInitFromSchema(&holder.c_array_view,
-                                                       self._schema._ptr, 
&error.c_error)
-        if result != NANOARROW_OK:
-            error.raise_message("ArrowArrayViewInitFromSchema()", result)
-
-        result = ArrowArrayViewSetArray(&holder.c_array_view, self._ptr, 
&error.c_error)
-        if result != NANOARROW_OK:
-            error.raise_message("ArrowArrayViewSetArray()", result)
-
-        return ArrayView(holder, holder._addr(), self._schema, self)
+    def __repr__(self):
+        return array_repr(self)
 
 
 cdef class ArrayView:
@@ -551,7 +545,8 @@ cdef class ArrayView:
     >>> import pyarrow as pa
     >>> import numpy as np
     >>> import nanoarrow as na
-    >>> array_view = na.array(pa.array(["one", "two", "three", None])).view()
+    >>> array = na.array(pa.array(["one", "two", "three", None]))
+    >>> array_view = na.array_view(array)
     >>> np.array(array_view.buffers[1])
     array([ 0,  3,  6, 11, 11], dtype=int32)
     >>> np.array(array_view.buffers[2])
@@ -560,6 +555,7 @@ cdef class ArrayView:
     """
     cdef object _base
     cdef ArrowArrayView* _ptr
+    cdef ArrowDevice* _device
     cdef Schema _schema
     cdef object _base_buffer
 
@@ -568,6 +564,7 @@ cdef class ArrayView:
         self._ptr = <ArrowArrayView*>addr
         self._schema = schema
         self._base_buffer = base_buffer
+        self._device = ArrowDeviceCpu()
 
     @property
     def length(self):
@@ -605,6 +602,26 @@ cdef class ArrayView:
     def schema(self):
         return self._schema
 
+    def _assert_cpu(self):
+        if self._device.device_type != ARROW_DEVICE_CPU:
+            raise RuntimeError("ArrayView is not representing a CPU device")
+
+    @staticmethod
+    def from_cpu_array(Array array):
+        cdef ArrayViewHolder holder = ArrayViewHolder()
+
+        cdef Error error = Error()
+        cdef int result = ArrowArrayViewInitFromSchema(&holder.c_array_view,
+                                                       array._schema._ptr, 
&error.c_error)
+        if result != NANOARROW_OK:
+            error.raise_message("ArrowArrayViewInitFromSchema()", result)
+
+        result = ArrowArrayViewSetArray(&holder.c_array_view, array._ptr, 
&error.c_error)
+        if result != NANOARROW_OK:
+            error.raise_message("ArrowArrayViewSetArray()", result)
+
+        return ArrayView(holder, holder._addr(), array._schema, array)
+
 
 cdef class SchemaChildren:
     """Wrapper for a lazily-resolved list of Schema children
@@ -706,13 +723,16 @@ cdef class ArrayViewChildren:
         k = int(k)
         if k < 0 or k >= self._length:
             raise IndexError(f"{k} out of range [0, {self._length})")
-        return ArrayView(
+        cdef ArrayView child = ArrayView(
             self._parent,
             self._child_addr(k),
             self._parent._schema.children[k],
             None
         )
 
+        child._device = self._parent._device
+        return child
+
     cdef _child_addr(self, int64_t i):
         cdef ArrowArrayView** children = self._parent._ptr.children
         cdef ArrowArrayView* child = children[i]
@@ -731,17 +751,19 @@ cdef class BufferView:
     cdef ArrowBufferView* _ptr
     cdef ArrowBufferType _buffer_type
     cdef ArrowType _buffer_data_type
+    cdef ArrowDevice* _device
     cdef Py_ssize_t _element_size_bits
     cdef Py_ssize_t _shape
     cdef Py_ssize_t _strides
 
     def __cinit__(self, object base, uintptr_t addr,
                  ArrowBufferType buffer_type, ArrowType buffer_data_type,
-                 Py_ssize_t element_size_bits):
+                 Py_ssize_t element_size_bits, uintptr_t device):
         self._base = base
         self._ptr = <ArrowBufferView*>addr
         self._buffer_type = buffer_type
         self._buffer_data_type = buffer_data_type
+        self._device = <ArrowDevice*>device
         self._element_size_bits = element_size_bits
         self._strides = self._item_size()
         self._shape = self._ptr.size_bytes // self._strides
@@ -784,6 +806,9 @@ cdef class BufferView:
             return "B"
 
     def __getbuffer__(self, Py_buffer *buffer, int flags):
+        if self._device.device_type != ARROW_DEVICE_CPU:
+            raise RuntimeError("nanoarrow.BufferView is not a CPU array")
+
         buffer.buf = <void*>self._ptr.data.data
         buffer.format = self._get_format()
         buffer.internal = NULL
@@ -830,7 +855,8 @@ cdef class ArrayViewBuffers:
             <uintptr_t>buffer_view,
             self._array_view._ptr.layout.buffer_type[k],
             self._array_view._ptr.layout.buffer_data_type[k],
-            self._array_view._ptr.layout.element_size_bits[k]
+            self._array_view._ptr.layout.element_size_bits[k],
+            <uintptr_t>self._array_view._device
         )
 
 
@@ -851,7 +877,20 @@ cdef class ArrayStream:
     >>> pa_reader = pa.RecordBatchReader.from_batches(pa_batch.schema, 
[pa_batch])
     >>> array_stream = na.array_stream(pa_reader)
     >>> array_stream.get_schema()
-    struct<col1: int32>
+    <nanoarrow.Schema struct>
+    - format: '+s'
+    - name: ''
+    - flags: 0
+    - metadata: NULL
+    - dictionary: NULL
+    - children[1]:
+      'col1': <nanoarrow.Schema int32>
+        - format: 'i'
+        - name: 'col1'
+        - flags: 2
+        - metadata: NULL
+        - dictionary: NULL
+        - children[0]:
     >>> array_stream.get_next().length
     3
     >>> array_stream.get_next() is None
@@ -964,3 +1003,101 @@ cdef class ArrayStream:
 
     def __next__(self):
         return self.get_next()
+
+    @staticmethod
+    def allocate():
+        base = ArrayStreamHolder()
+        return ArrayStream(base, base._addr())
+
+
+cdef class DeviceArrayHolder:
+    """Memory holder for an ArrowDeviceArray
+
+    This class is responsible for the lifecycle of the ArrowDeviceArray
+    whose memory it is responsible. When this object is deleted,
+    a non-NULL release callback is invoked.
+    """
+    cdef ArrowDeviceArray c_array
+
+    def __cinit__(self):
+        self.c_array.array.release = NULL
+
+    def __dealloc__(self):
+        if self.c_array.array.release != NULL:
+          self.c_array.array.release(&self.c_array.array)
+
+    def _addr(self):
+        return <uintptr_t>&self.c_array
+
+cdef class Device:
+    """ArrowDevice wrapper
+
+    The ArrowDevice structure is a nanoarrow internal struct (i.e.,
+    not ABI stable) that contains callbacks for device operations
+    beyond its type and identifier (e.g., copy buffers to or from
+    a device).
+    """
+    cdef object _base
+    cdef ArrowDevice* _ptr
+
+    def __cinit__(self, object base, uintptr_t addr):
+        self._base = base,
+        self._ptr = <ArrowDevice*>addr
+
+    def _array_init(self, uintptr_t array_addr, Schema schema):
+        cdef ArrowArray* array_ptr = <ArrowArray*>array_addr
+        cdef DeviceArrayHolder holder = DeviceArrayHolder()
+        cdef int result = ArrowDeviceArrayInit(self._ptr, &holder.c_array, 
array_ptr)
+        if result != NANOARROW_OK:
+            Error.raise_error("ArrowDevice::init_array", result)
+
+        return DeviceArray(holder, holder._addr(), schema)
+
+    def __repr__(self):
+        return device_repr(self)
+
+    @property
+    def device_type(self):
+        return self._ptr.device_type
+
+    @property
+    def device_id(self):
+        return self._ptr.device_id
+
+    @staticmethod
+    def resolve(ArrowDeviceType device_type, int64_t device_id):
+        if device_type == ARROW_DEVICE_CPU:
+            return Device.cpu()
+        else:
+            raise ValueError(f"Device not found for type 
{device_type}/{device_id}")
+
+    @staticmethod
+    def cpu():
+        # The CPU device is statically allocated (so base is None)
+        return Device(None, <uintptr_t>ArrowDeviceCpu())
+
+
+cdef class DeviceArray:
+    cdef object _base
+    cdef ArrowDeviceArray* _ptr
+    cdef Schema _schema
+
+    def __cinit__(self, object base, uintptr_t addr, Schema schema):
+        self._base = base
+        self._ptr = <ArrowDeviceArray*>addr
+        self._schema = schema
+
+    @property
+    def device_type(self):
+        return self._ptr.device_type
+
+    @property
+    def device_id(self):
+        return self._ptr.device_id
+
+    @property
+    def array(self):
+        return Array(self, <uintptr_t>&self._ptr.array, self._schema)
+
+    def __repr__(self):
+        return device_array_repr(self)
diff --git a/python/src/nanoarrow/_lib_utils.py 
b/python/src/nanoarrow/_lib_utils.py
new file mode 100644
index 0000000..abbd1fc
--- /dev/null
+++ b/python/src/nanoarrow/_lib_utils.py
@@ -0,0 +1,98 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# The functions here are imported in _lib.pyx. They're defined here
+# instead of there to make it easier to iterate (no need to rebuild
+# after editing when working with an editable installation)
+
+
+def schema_repr(schema, indent=0):
+    indent_str = " " * indent
+    if schema._addr() == 0:
+        return "<NULL nanoarrow.Schema>"
+    elif not schema.is_valid():
+        return "<released nanoarrow.Schema>"
+
+    lines = [f"<nanoarrow.Schema {schema._to_string()}>"]
+
+    for attr in ("format", "name", "flags"):
+        attr_repr = repr(getattr(schema, attr))
+        lines.append(f"{indent_str}- {attr}: {attr_repr}")
+
+    metadata = schema.metadata
+    if schema.metadata is None:
+        lines.append(f"{indent_str}- metadata: NULL")
+    else:
+        lines.append(f"{indent_str}- metadata:")
+        for key, value in metadata:
+            lines.append(f"{indent_str}  - {repr(key)}: {repr(value)}")
+
+    if schema.dictionary:
+        dictionary_repr = schema_repr(schema.dictionary, indent=indent + 2)
+        lines.append(f"{indent_str}- dictionary: {dictionary_repr}")
+    else:
+        lines.append(f"{indent_str}- dictionary: NULL")
+
+    children = schema.children
+    lines.append(f"{indent_str}- children[{len(children)}]:")
+    for child in children:
+        child_repr = schema_repr(child, indent=indent + 4)
+        lines.append(f"{indent_str}  {repr(child.name)}: {child_repr}")
+
+    return "\n".join(lines)
+
+
+def array_repr(array, indent=0):
+    indent_str = " " * indent
+    if array._addr() == 0:
+        return "<NULL nanoarrow.Array>"
+    elif not array.is_valid():
+        return "<released nanoarrow.Array>"
+
+    lines = [f"<nanoarrow.Array {array.schema._to_string()}>"]
+    for attr in ("length", "offset", "null_count", "buffers"):
+        attr_repr = repr(getattr(array, attr))
+        lines.append(f"{indent_str}- {attr}: {attr_repr}")
+
+    if array.dictionary:
+        dictionary_repr = array_repr(array.dictionary, indent=indent + 2)
+        lines.append(f"{indent_str}- dictionary: {dictionary_repr}")
+    else:
+        lines.append(f"{indent_str}- dictionary: NULL")
+
+    children = array.children
+    lines.append(f"{indent_str}- children[{len(children)}]:")
+    for child in children:
+        child_repr = array_repr(child, indent=indent + 4)
+        lines.append(f"{indent_str}  {repr(child.schema.name)}: {child_repr}")
+
+    return "\n".join(lines)
+
+
+def device_array_repr(device_array):
+    title_line = "<nanoarrow.device.DeviceArray>"
+    device_type = f"- device_type: {device_array.device_type}"
+    device_id = f"- device_id: {device_array.device_id}"
+    array = f"- array: {array_repr(device_array.array, indent=2)}"
+    return "\n".join((title_line, device_type, device_id, array))
+
+
+def device_repr(device):
+    title_line = "<nanoarrow.device.Device>"
+    device_type = f"- device_type: {device.device_type}"
+    device_id = f"- device_id: {device.device_id}"
+    return "\n".join([title_line, device_type, device_id])
diff --git a/python/src/nanoarrow/__init__.py b/python/src/nanoarrow/device.py
similarity index 73%
copy from python/src/nanoarrow/__init__.py
copy to python/src/nanoarrow/device.py
index 1cde540..9fe4151 100644
--- a/python/src/nanoarrow/__init__.py
+++ b/python/src/nanoarrow/device.py
@@ -15,5 +15,15 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from ._lib import Array, ArrayStream, ArrayView, Schema, c_version  # noqa: 
F401
-from .lib import array, array_stream, schema  # noqa: F401
+from nanoarrow._lib import Device, DeviceArray
+from nanoarrow.lib import array
+
+
+def device_array(obj):
+    if isinstance(obj, DeviceArray):
+        return obj
+
+    # Only CPU for now
+    cpu_array = array(obj)
+
+    return Device.cpu()._array_init(cpu_array._addr(), cpu_array.schema)
diff --git a/python/src/nanoarrow/lib.py b/python/src/nanoarrow/lib.py
index 6625aff..43e91dd 100644
--- a/python/src/nanoarrow/lib.py
+++ b/python/src/nanoarrow/lib.py
@@ -15,7 +15,14 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from ._lib import Array, ArrayStream, Schema
+from nanoarrow._lib import Array, ArrayStream, ArrayView, Schema
+
+
+def array_view(obj):
+    if isinstance(obj, ArrayView):
+        return obj
+
+    return ArrayView.from_cpu_array(array(obj))
 
 
 def schema(obj):
diff --git a/python/src/nanoarrow/nanoarrow_device_c.pxd 
b/python/src/nanoarrow/nanoarrow_device_c.pxd
new file mode 100644
index 0000000..f2a65a9
--- /dev/null
+++ b/python/src/nanoarrow/nanoarrow_device_c.pxd
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from libc.stdint cimport int32_t, int64_t
+
+from nanoarrow_c cimport *
+
+cdef extern from "nanoarrow_device.h" nogil:
+
+    ctypedef int32_t ArrowDeviceType
+
+    int32_t ARROW_DEVICE_CPU
+    int32_t ARROW_DEVICE_CUDA
+    int32_t ARROW_DEVICE_CUDA_HOST
+    int32_t ARROW_DEVICE_METAL
+
+    struct ArrowDeviceArray:
+        ArrowArray array
+        int64_t device_id
+        ArrowDeviceType device_type
+        void* sync_event
+
+    struct ArrowDevice:
+        ArrowDeviceType device_type
+        int64_t device_id
+        ArrowErrorCode (*array_init)(ArrowDevice* device,
+                                     ArrowDeviceArray* device_array,
+                                     ArrowArray* array)
+        ArrowErrorCode (*array_move)(ArrowDevice* device_src,
+                                     ArrowDeviceArray* src,
+                                     ArrowDevice* device_dst,
+                                     ArrowDeviceArray* dst)
+        ArrowErrorCode (*buffer_init)(ArrowDevice* device_src,
+                                      ArrowBufferView src,
+                                      ArrowDevice* device_dst, ArrowBuffer* 
dst)
+        ArrowErrorCode (*buffer_move)(ArrowDevice* device_src, ArrowBuffer* 
src,
+                                      ArrowDevice* device_dst, ArrowBuffer* 
dst)
+        ArrowErrorCode (*buffer_copy)(ArrowDevice* device_src,
+                                      ArrowBufferView src,
+                                      ArrowDevice* device_dst,
+                                      ArrowBufferView dst)
+        ArrowErrorCode (*synchronize_event)(ArrowDevice* device, void* 
sync_event,
+                                            ArrowError* error)
+        void (*release)(ArrowDevice* device)
+        void* private_data
+
+
+    struct ArrowDeviceArrayView:
+        ArrowDevice* device
+        ArrowArrayView array_view
+
+
+    ArrowErrorCode ArrowDeviceArrayInit(ArrowDevice* device,
+                                        ArrowDeviceArray* device_array,
+                                        ArrowArray* array)
+
+    void ArrowDeviceArrayViewInit(ArrowDeviceArrayView* device_array_view)
+
+    ArrowErrorCode ArrowDeviceArrayViewSetArrayMinimal(ArrowDeviceArrayView* 
device_array_view,
+                                                       ArrowDeviceArray* 
device_array,
+                                                       ArrowError* error)
+
+    ArrowErrorCode ArrowDeviceArrayViewSetArray(ArrowDeviceArrayView* 
device_array_view,
+                                                ArrowDeviceArray* device_array,
+                                                ArrowError* error)
+
+    ArrowErrorCode ArrowDeviceArrayViewCopy(ArrowDeviceArrayView* src,
+                                            ArrowDevice* device_dst,
+                                            ArrowDeviceArray* dst)
+
+    ArrowDevice* ArrowDeviceCpu()
diff --git a/python/tests/test_capsules.py b/python/tests/test_capsules.py
index f42f57e..e14dc8c 100644
--- a/python/tests/test_capsules.py
+++ b/python/tests/test_capsules.py
@@ -52,7 +52,7 @@ def test_schema_import():
         # some basic validation
         assert schema.is_valid()
         assert schema.format == "+s"
-        assert str(schema) == "struct<some_name: int32>"
+        assert schema._to_string(recursive=True) == "struct<some_name: int32>"
 
 
 def test_array_import():
@@ -63,7 +63,7 @@ def test_array_import():
         # some basic validation
         assert array.is_valid()
         assert array.length == 3
-        assert str(array.schema) == "int32"
+        assert array.schema._to_string(recursive=True) == "int32"
 
 
 def test_array_stream_import():
@@ -78,4 +78,7 @@ def test_array_stream_import():
         assert array_stream.is_valid()
         array = array_stream.get_next()
         assert array.length == 3
-        assert str(array_stream.get_schema()) == "struct<some_column: int32>"
+        assert (
+            array_stream.get_schema()._to_string(recursive=True)
+            == "struct<some_column: int32>"
+        )
diff --git a/python/.gitignore b/python/tests/test_device.py
similarity index 58%
copy from python/.gitignore
copy to python/tests/test_device.py
index cd267c1..1426e36 100644
--- a/python/.gitignore
+++ b/python/tests/test_device.py
@@ -1,4 +1,3 @@
-
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -16,39 +15,26 @@
 # specific language governing permissions and limitations
 # under the License.
 
-src/nanoarrow/nanoarrow.c
-src/nanoarrow/nanoarrow.h
-src/nanoarrow/nanoarrow_c.pxd
-src/nanoarrow/*.c
-
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-*$py.class
-
-# C extensions
-*.so
-
-# Distribution / packaging
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-pip-wheel-metadata/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-
-# Unit test / coverage reports
-.pytest_cache/
+import pyarrow as pa
+
+from nanoarrow import device
+
+
+def test_cpu_device():
+    cpu = device.Device.cpu()
+    assert cpu.device_type == 1
+    assert cpu.device_id == 0
+    assert "device_type: 1" in repr(cpu)
+
+    cpu = device.Device.resolve(1, 0)
+    assert cpu.device_type == 1
+
+    pa_array = pa.array([1, 2, 3])
+
+    darray = device.device_array(pa_array)
+    assert darray.device_type == 1
+    assert darray.device_id == 0
+    assert darray.array.length == 3
+    assert "device_type: 1" in repr(darray)
+
+    assert device.device_array(darray) is darray
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index da0be6a..816ff14 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -49,13 +49,29 @@ def test_array_helper():
     assert isinstance(array, na.Array)
 
     with pytest.raises(TypeError):
-        na.schema(None)
+        na.array(None)
+
+
+def test_array_stream_helper():
+    array_stream = na.ArrayStream.allocate()
+    assert na.array_stream(array_stream) is array_stream
+
+    with pytest.raises(TypeError):
+        na.array_stream(None)
+
+
+def test_array_view_helper():
+    array = na.array(pa.array([1, 2, 3]))
+    view = na.array_view(array)
+    assert isinstance(view, na.ArrayView)
+    assert na.array_view(view) is view
 
 
 def test_schema_basic():
     schema = na.Schema.allocate()
     assert schema.is_valid() is False
-    assert repr(schema) == "[invalid: schema is released]"
+    assert schema._to_string() == "[invalid: schema is released]"
+    assert repr(schema) == "<released nanoarrow.Schema>"
 
     schema = na.schema(pa.schema([pa.field("some_name", pa.int32())]))
 
@@ -65,7 +81,8 @@ def test_schema_basic():
     assert len(schema.children) == 1
     assert schema.children[0].format == "i"
     assert schema.children[0].name == "some_name"
-    assert repr(schema.children[0]) == "int32"
+    assert schema.children[0]._to_string() == "int32"
+    assert "<nanoarrow.Schema int32>" in repr(schema)
     assert schema.dictionary is None
 
     with pytest.raises(IndexError):
@@ -76,6 +93,7 @@ def test_schema_dictionary():
     schema = na.schema(pa.dictionary(pa.int32(), pa.utf8()))
     assert schema.format == "i"
     assert schema.dictionary.format == "u"
+    assert "dictionary: <nanoarrow.Schema string" in repr(schema)
 
 
 def test_schema_metadata():
@@ -87,6 +105,7 @@ def test_schema_metadata():
     meta2 = {k: v for k, v in schema.metadata}
     assert list(meta2.keys()) == ["key1", "key2"]
     assert list(meta2.values()) == [b"value1", b"value2"]
+    assert "'key1': b'value1'" in repr(schema)
 
 
 def test_schema_view():
@@ -150,6 +169,12 @@ def test_schema_view_extra_params():
     assert view.extension_metadata == b"some_metadata"
 
 
+def test_array_empty():
+    array = na.Array.allocate(na.Schema.allocate())
+    assert array.is_valid() is False
+    assert repr(array) == "<released nanoarrow.Array>"
+
+
 def test_array():
     array = na.array(pa.array([1, 2, 3], pa.int32()))
     assert array.is_valid() is True
@@ -160,14 +185,30 @@ def test_array():
     assert array.buffers[0] == 0
     assert len(array.children) == 0
     assert array.dictionary is None
+    assert "<nanoarrow.Array int32" in repr(array)
+
+
+def test_array_recursive():
+    array = na.array(pa.record_batch([pa.array([1, 2, 3], pa.int32())], 
["col"]))
+    assert len(array.children) == 1
+    assert array.children[0].length == 3
+    assert array.children[0].schema._to_string() == "int32"
+    assert "'col': <nanoarrow.Array int32" in repr(array)
 
     with pytest.raises(IndexError):
         array.children[1]
 
 
+def test_array_dictionary():
+    array = na.array(pa.array(["a", "b", "b"]).dictionary_encode())
+    assert array.length == 3
+    assert array.dictionary.length == 2
+    assert "dictionary: <nanoarrow.Array string>" in repr(array)
+
+
 def test_array_view():
     array = na.array(pa.array([1, 2, 3], pa.int32()))
-    view = array.view()
+    view = na.array_view(array)
 
     assert view.schema is array.schema
 
@@ -198,7 +239,7 @@ def test_array_view_recursive():
     assert array.children[0].length == 3
     assert array.children[0].schema._addr() == array.schema.children[0]._addr()
 
-    view = array.view()
+    view = na.array_view(array)
     assert len(view.buffers) == 1
     assert len(view.children) == 1
     assert view.schema._addr() == array.schema._addr()
@@ -215,7 +256,7 @@ def test_array_view_dictionary():
     assert array.schema.format == "i"
     assert array.dictionary.schema.format == "u"
 
-    view = array.view()
+    view = na.array_view(array)
     assert len(view.buffers) == 2
     assert len(view.dictionary.buffers) == 3
 
@@ -235,14 +276,14 @@ def test_buffers_data():
     ]
 
     for pa_type, np_type in data_types:
-        view = na.array(pa.array([0, 1, 2], pa_type)).view()
+        view = na.array_view(pa.array([0, 1, 2], pa_type))
         np.testing.assert_array_equal(
             np.array(view.buffers[1]), np.array([0, 1, 2], np_type)
         )
 
 
 def test_buffers_string():
-    view = na.array(pa.array(["a", "bc", "def"])).view()
+    view = na.array_view(pa.array(["a", "bc", "def"]))
 
     assert view.buffers[0] is None
     np.testing.assert_array_equal(
@@ -254,7 +295,7 @@ def test_buffers_string():
 
 
 def test_buffers_binary():
-    view = na.array(pa.array([b"a", b"bc", b"def"])).view()
+    view = na.array_view(pa.array([b"a", b"bc", b"def"]))
 
     assert view.buffers[0] is None
     np.testing.assert_array_equal(
@@ -265,6 +306,8 @@ def test_buffers_binary():
 
 def test_array_stream():
     array_stream = na.ArrayStream.allocate()
+    assert na.array_stream(array_stream) is array_stream
+
     assert array_stream.is_valid() is False
     with pytest.raises(RuntimeError):
         array_stream.get_schema()


Reply via email to