This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new acbf6786 feat(python): Add `Array.from_chunks()` constructor (#456)
acbf6786 is described below
commit acbf67864e771915c344e449a0f1fddb0e0a1170
Author: Dewey Dunnington <[email protected]>
AuthorDate: Wed May 8 17:11:36 2024 -0300
feat(python): Add `Array.from_chunks()` constructor (#456)
This PR adds a public route to construct chunked arrays (and makes the
other constructors safer to account for the fact that they are now
user-facing). I use this quite a lot interactively to test that things
work in the chunked case, and for nanoarrow to be useful in a "I can
help you export things" kind of way, it needs to be able to do this
(because string arrays with more than 2 GB of text or binary are not
uncommon).
The main safety consideration here is ensuring that all chunks have a
schema of the same type, so I had to add a function to check for that
(and ensure it was being checked).
```python
import nanoarrow as na
import numpy as np
na.Array.from_chunks([[1, 2, 3], [4, 5, 6]], na.int32())
na.Array.from_chunks((np.random.random(int(1e3)) for _ in range(int(1e3))))
```
---
python/src/nanoarrow/_lib.pyx | 167 ++++++++++++++++++++++++---------
python/src/nanoarrow/array.py | 56 ++++++++++-
python/src/nanoarrow/c_array_stream.py | 2 +-
python/src/nanoarrow/iterator.py | 4 +-
python/tests/test_array.py | 45 ++++++++-
python/tests/test_c_array_stream.py | 28 +++---
python/tests/test_c_schema.py | 64 +++++++++++++
python/tests/test_capsules.py | 4 +-
python/tests/test_device.py | 6 +-
python/tests/test_ipc.py | 6 +-
python/tests/test_nanoarrow.py | 1 +
11 files changed, 308 insertions(+), 75 deletions(-)
diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index 690afa2b..b99a6505 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -769,6 +769,47 @@ cdef class CSchema:
def __repr__(self):
return _repr_utils.schema_repr(self)
+ def type_equals(self, CSchema other, check_nullability=False):
+ self._assert_valid()
+
+ if self._ptr == other._ptr:
+ return True
+
+ if self.format != other.format:
+ return False
+
+ # Nullability is not strictly part of the "type"; however, performing
+ # this check recursively is verbose to otherwise accomplish and
+ # sometimes this does matter.
+ cdef int64_t flags = self.flags
+ cdef int64_t other_flags = other.flags
+ if not check_nullability:
+ flags &= ~ARROW_FLAG_NULLABLE
+ other_flags &= ~ARROW_FLAG_NULLABLE
+
+ if flags != other_flags:
+ return False
+
+ if self.n_children != other.n_children:
+ return False
+
+ for child, other_child in zip(self.children, other.children):
+ if not child.type_equals(other_child,
check_nullability=check_nullability):
+ return False
+
+ if (self.dictionary is None) != (other.dictionary is None):
+ return False
+
+ if self.dictionary is not None:
+ if not self.dictionary.type_equals(
+ other.dictionary,
+ check_nullability=check_nullability
+ ):
+ return False
+
+ return True
+
+
@property
def format(self):
self._assert_valid()
@@ -874,6 +915,24 @@ cdef class CSchema:
return builder.finish()
+# This is likely a better fit for a dedicated testing module; however, we need
+# it in _lib.pyx to produce nice error messages when ensuring that one or
+# more arrays conform to a given or inferred schema.
+def assert_type_equal(actual, expected):
+ if not isinstance(actual, CSchema):
+ raise TypeError(f"expected is {type(actual).__name__}, not CSchema")
+
+ if not isinstance(expected, CSchema):
+ raise TypeError(f"expected is {type(expected).__name__}, not CSchema")
+
+ if not actual.type_equals(expected):
+ actual_label = actual._to_string(max_chars=80, recursive=True)
+ expected_label = expected._to_string(max_chars=80, recursive=True)
+ raise ValueError(
+ f"Expected schema\n '{expected_label}'"
+ f"\nbut got\n '{actual_label}'"
+ )
+
cdef class CSchemaView:
"""Low-level ArrowSchemaView wrapper
@@ -1359,9 +1418,9 @@ cdef class CArray:
cdef int64_t start = 0 if k.start is None else k.start
cdef int64_t stop = self._ptr.length if k.stop is None else k.stop
if start < 0:
- start = self.length + start
+ start = self._ptr.length + start
if stop < 0:
- stop = self.length + stop
+ stop = self._ptr.length + stop
if start > self._ptr.length or stop > self._ptr.length or stop < start:
raise IndexError(
@@ -1448,11 +1507,14 @@ cdef class CArray:
def device_id(self):
return self._device_id
- @property
- def length(self):
+ def __len__(self):
self._assert_valid()
return self._ptr.length
+ @property
+ def length(self):
+ return len(self)
+
@property
def offset(self):
self._assert_valid()
@@ -1557,9 +1619,12 @@ cdef class CArrayView:
def layout(self):
return CLayout(self, <uintptr_t>&self._ptr.layout)
+ def __len__(self):
+ return self._ptr.length
+
@property
def length(self):
- return self._ptr.length
+ return len(self)
@property
def offset(self):
@@ -2700,20 +2765,33 @@ cdef class CArrayStream:
return CArrayStream(base, <uintptr_t>c_array_stream_out)
@staticmethod
- def from_array_list(arrays, CSchema schema, move=False, validate=True):
+ def from_c_arrays(arrays, CSchema schema, move=False, validate=True):
cdef ArrowArrayStream* c_array_stream_out
base = alloc_c_array_stream(&c_array_stream_out)
- if not move:
- schema = schema.__deepcopy__()
-
- cdef int code = ArrowBasicArrayStreamInit(c_array_stream_out,
schema._ptr, len(arrays))
+ # Don't create more copies than we have to (but make sure
+ # one exists for validation if requested)
+ cdef CSchema out_schema = schema
+ if validate and not move:
+ validate_schema = schema
+ out_schema = schema.__deepcopy__()
+ elif validate:
+ validate_schema = schema.__deepcopy__()
+ out_schema = schema
+ elif not move:
+ out_schema = schema.__deepcopy__()
+
+ cdef int code = ArrowBasicArrayStreamInit(c_array_stream_out,
out_schema._ptr, len(arrays))
Error.raise_error_not_ok("ArrowBasicArrayStreamInit()", code)
cdef ArrowArray tmp
cdef CArray array
for i in range(len(arrays)):
array = arrays[i]
+
+ if validate:
+ assert_type_equal(array.schema, validate_schema)
+
if not move:
c_array_shallow_copy(array._base, array._ptr, &tmp)
ArrowBasicArrayStreamSetArray(c_array_stream_out, i, &tmp)
@@ -2887,7 +2965,7 @@ cdef class CMaterializedArrayStream:
def __iter__(self):
for c_array in self._arrays:
- for item_i in range(c_array.length):
+ for item_i in range(len(c_array)):
yield c_array, item_i
def array(self, int64_t i):
@@ -2904,7 +2982,13 @@ cdef class CMaterializedArrayStream:
def __arrow_c_stream__(self, requested_schema=None):
# When an array stream from iterable is supported, that could be used
here
# to avoid unnessary shallow copies.
- stream = CArrayStream.from_array_list(self._arrays, self._schema,
move=False)
+ stream = CArrayStream.from_c_arrays(
+ self._arrays,
+ self._schema,
+ move=False,
+ validate=False
+ )
+
return stream.__arrow_c_stream__(requested_schema=requested_schema)
def child(self, int64_t i):
@@ -2914,7 +2998,7 @@ cdef class CMaterializedArrayStream:
out._schema = self._schema.child(i)
out._arrays = [chunk.child(i) for chunk in self._arrays]
for child_chunk in out._arrays:
- out._total_length += child_chunk.length
+ out._total_length += len(child_chunk)
code = ArrowBufferAppendInt64(out._array_ends._ptr,
out._total_length)
Error.raise_error_not_ok("ArrowBufferAppendInt64()", code)
@@ -2922,45 +3006,44 @@ cdef class CMaterializedArrayStream:
return out
@staticmethod
- def from_c_array(CArray array):
- array._assert_valid()
-
+ def from_c_arrays(arrays, CSchema schema, bint validate=True):
cdef CMaterializedArrayStream out = CMaterializedArrayStream()
- out._schema = array._schema
- if array._ptr.length == 0:
- out._finalize()
- return out
+ for array in arrays:
+ if not isinstance(array, CArray):
+ raise TypeError(f"Expected CArray but got
{type(array).__name__}")
- out._arrays.append(array)
- out._total_length += array._ptr.length
- cdef int code = ArrowBufferAppendInt64(out._array_ends._ptr,
out._total_length)
- Error.raise_error_not_ok("ArrowBufferAppendInt64()", code)
+ if len(array) == 0:
+ continue
+
+ if validate:
+ assert_type_equal(array.schema, schema)
+
+ out._total_length += len(array)
+ code = ArrowBufferAppendInt64(out._array_ends._ptr,
out._total_length)
+ Error.raise_error_not_ok("ArrowBufferAppendInt64()", code)
+ out._arrays.append(array)
+ out._schema = schema
out._finalize()
return out
@staticmethod
- def from_c_array_stream(CArrayStream stream):
- stream._assert_valid()
- cdef CMaterializedArrayStream out = CMaterializedArrayStream()
- cdef int code
- cdef CArray array
+ def from_c_array(CArray array):
+ return CMaterializedArrayStream.from_c_arrays(
+ [array],
+ array.schema,
+ validate=False
+ )
+ @staticmethod
+ def from_c_array_stream(CArrayStream stream):
with stream:
- for array in stream:
- if array._ptr.length == 0:
- continue
-
- out._total_length += array._ptr.length
- code = ArrowBufferAppendInt64(out._array_ends._ptr,
out._total_length)
- Error.raise_error_not_ok("ArrowBufferAppendInt64()", code)
- out._arrays.append(array)
-
- out._schema = stream._get_cached_schema()
-
- out._finalize()
- return out
+ return CMaterializedArrayStream.from_c_arrays(
+ stream,
+ stream._get_cached_schema(),
+ validate=False
+ )
cdef class CDeviceArray:
diff --git a/python/src/nanoarrow/array.py b/python/src/nanoarrow/array.py
index 35d58c40..eabe8f2a 100644
--- a/python/src/nanoarrow/array.py
+++ b/python/src/nanoarrow/array.py
@@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
+import itertools
from functools import cached_property
from typing import Iterable, Tuple
@@ -28,6 +29,7 @@ from nanoarrow._lib import (
)
from nanoarrow.c_array import c_array, c_array_view
from nanoarrow.c_array_stream import c_array_stream
+from nanoarrow.c_schema import c_schema
from nanoarrow.iterator import iter_array_views, iter_py, iter_tuples
from nanoarrow.schema import Schema
@@ -161,6 +163,56 @@ class Array:
with c_array_stream(obj, schema=schema) as stream:
self._data = CMaterializedArrayStream.from_c_array_stream(stream)
+ @staticmethod
+ def from_chunks(obj: Iterable, schema=None, validate: bool = True):
+ """Create an Array with explicit chunks
+
+ Creates an :class:`Array` with explicit chunking from an iterable of
+ objects that can be converted to a :func:`c_array`.
+
+ Parameters
+ ----------
+ obj : iterable of array-like
+ An iterable of objects that can be passed to :func:`c_array`.
+ schema : schema-like, optional
+ An optional schema. If present, will be passed to :func:`c_array`
+ for each item in obj; if not present it will be inferred from the
first
+ chunk.
+ validate : bool
+ Use ``False`` to opt out of validation steps performed when
constructing
+ this array.
+
+ Examples
+ --------
+ >>> import nanoarrow as na
+ >>> na.Array.from_chunks([[1, 2, 3], [4, 5, 6]], na.int32())
+ nanoarrow.Array<int32>[6]
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ """
+ obj = iter(obj)
+
+ if schema is None:
+ first = next(obj, None)
+ if first is None:
+ raise ValueError("Can't create empty Array from chunks without
schema")
+
+ first = c_array(first)
+ out_schema = first.schema
+ obj = itertools.chain([first], obj)
+ else:
+ out_schema = c_schema(schema)
+
+ data = CMaterializedArrayStream.from_c_arrays(
+ (c_array(item, schema) for item in obj), out_schema,
validate=validate
+ )
+
+ return Array(data)
+
def _assert_one_chunk(self, op):
if self._data.n_arrays != 1:
raise ValueError(f"Can't {op} with non-contiguous Array")
@@ -278,9 +330,9 @@ class Array:
>>> import nanoarrow as na
>>> array = na.Array([1, 2, 3], na.int32())
>>> for view in array.iter_chunk_views():
- ... offset, length = view.offset, view.length
+ ... offset, length = view.offset, len(view)
... validity, data = view.buffers
- ... print(view.offset, view.length)
+ ... print(offset, length)
... print(validity)
... print(data)
0 3
diff --git a/python/src/nanoarrow/c_array_stream.py
b/python/src/nanoarrow/c_array_stream.py
index 816f2696..77eeaaf6 100644
--- a/python/src/nanoarrow/c_array_stream.py
+++ b/python/src/nanoarrow/c_array_stream.py
@@ -88,7 +88,7 @@ def c_array_stream(obj=None, schema=None) -> CArrayStream:
try:
array = c_array(obj, schema=schema)
- return CArrayStream.from_array_list([array], array.schema,
validate=False)
+ return CArrayStream.from_c_arrays([array], array.schema,
validate=False)
except Exception as e:
raise TypeError(
f"An error occurred whilst converting {type(obj).__name__} "
diff --git a/python/src/nanoarrow/iterator.py b/python/src/nanoarrow/iterator.py
index aef56263..76f2a775 100644
--- a/python/src/nanoarrow/iterator.py
+++ b/python/src/nanoarrow/iterator.py
@@ -140,7 +140,7 @@ class ArrayViewBaseIterator:
iterator = cls(stream._get_cached_schema())
for array in stream:
iterator._set_array(array)
- yield from iterator._iter_chunk(0, array.length)
+ yield from iterator._iter_chunk(0, len(array))
def __init__(self, schema, *, _array_view=None):
self._schema = c_schema(schema)
@@ -222,7 +222,7 @@ class PyIterator(ArrayViewBaseIterator):
def _dictionary_iter(self, offset, length):
dictionary = list(
- self._dictionary._iter_chunk(0,
self._dictionary._array_view.length)
+ self._dictionary._iter_chunk(0, len(self._dictionary._array_view))
)
for dict_index in self._primitive_iter(offset, length):
yield None if dict_index is None else dictionary[dict_index]
diff --git a/python/tests/test_array.py b/python/tests/test_array.py
index 5ef75a69..a6a79eec 100644
--- a/python/tests/test_array.py
+++ b/python/tests/test_array.py
@@ -43,6 +43,41 @@ def test_array_alias_constructor():
assert array.schema.type == na.Type.INT32
+def test_array_from_chunks():
+ # Check with explicit schema
+ array = na.Array.from_chunks([[1, 2, 3], [4, 5, 6]], na.int32())
+ assert array.schema.type == na.Type.INT32
+ assert array.n_chunks == 2
+ assert list(array.iter_py()) == [1, 2, 3, 4, 5, 6]
+
+ # Check with schema inferred from first chunk
+ array = na.Array.from_chunks(array.iter_chunks())
+ assert array.schema.type == na.Type.INT32
+ assert array.n_chunks == 2
+ assert list(array.iter_py()) == [1, 2, 3, 4, 5, 6]
+
+ # Check empty
+ array = na.Array.from_chunks([], na.int32())
+ assert array.schema.type == na.Type.INT32
+ assert len(array) == 0
+ assert array.n_chunks == 0
+
+ msg = "Can't create empty Array from chunks without schema"
+ with pytest.raises(ValueError, match=msg):
+ na.Array.from_chunks([])
+
+
+def test_array_from_chunks_validate():
+ chunks = [na.c_array([1, 2, 3], na.uint32()), na.c_array([1, 2, 3],
na.int32())]
+ # Check that we get validation by default
+ with pytest.raises(ValueError, match="Expected schema"):
+ na.Array.from_chunks(chunks)
+
+ # ...but that one can opt out
+ array = na.Array.from_chunks(chunks, validate=False)
+ assert list(array.iter_py()) == [1, 2, 3, 1, 2, 3]
+
+
def test_array_empty():
array = na.Array([], na.int32())
assert array.schema.type == na.Type.INT32
@@ -71,7 +106,7 @@ def test_array_empty():
assert len(arrays) == 0
c_array = na.c_array(array)
- assert c_array.length == 0
+ assert len(c_array) == 0
assert c_array.schema.format == "i"
@@ -118,14 +153,14 @@ def test_array_contiguous():
assert len(arrays) == 1
c_array = na.c_array(array)
- assert c_array.length == 3
+ assert len(c_array) == 3
assert c_array.schema.format == "i"
def test_array_chunked():
src = [na.c_array([1, 2, 3], na.int32()), na.c_array([4, 5, 6],
na.int32())]
- array = na.Array(CArrayStream.from_array_list(src,
na.c_schema(na.int32())))
+ array = na.Array(CArrayStream.from_c_arrays(src, na.c_schema(na.int32())))
assert array.schema.type == na.Type.INT32
assert len(array) == 6
@@ -176,7 +211,7 @@ def test_array_children():
children=[na.c_array([123456], na.int32())] * 100,
)
src = [c_array, c_array]
- array = na.Array(CArrayStream.from_array_list(src, c_array.schema))
+ array = na.Array(CArrayStream.from_c_arrays(src, c_array.schema))
assert array.n_children == 100
assert array.child(0).schema.type == na.Type.INT32
@@ -198,7 +233,7 @@ def test_scalar_to_array():
assert scalar.device is array.device
as_array = na.c_array(scalar)
assert as_array.offset == 1
- assert as_array.length == 1
+ assert len(as_array) == 1
assert as_array.buffers == na.c_array(array).buffers
with pytest.raises(NotImplementedError):
diff --git a/python/tests/test_c_array_stream.py
b/python/tests/test_c_array_stream.py
index 0fe38f4a..a788302c 100644
--- a/python/tests/test_c_array_stream.py
+++ b/python/tests/test_c_array_stream.py
@@ -16,7 +16,6 @@
# under the License.
import pytest
-from nanoarrow._lib import NanoarrowException
from nanoarrow.c_array_stream import CArrayStream
import nanoarrow as na
@@ -24,12 +23,12 @@ import nanoarrow as na
def test_c_array_stream_from_c_array_stream():
# Wrapping an existing stream is a no-op
- array_stream = CArrayStream.from_array_list([], na.c_schema(na.int32()))
+ array_stream = CArrayStream.from_c_arrays([], na.c_schema(na.int32()))
stream_from_stream = na.c_array_stream(array_stream)
assert stream_from_stream is array_stream
# With requested_schema should go through capsule
- array_stream = CArrayStream.from_array_list([], na.c_schema(na.int32()))
+ array_stream = CArrayStream.from_c_arrays([], na.c_schema(na.int32()))
with pytest.raises(NotImplementedError):
na.c_array_stream(array_stream, na.int64())
@@ -43,7 +42,7 @@ def test_c_array_stream_from_capsule_protocol():
def __arrow_c_stream__(self, *args, **kwargs):
return self.obj.__arrow_c_stream__(*args, **kwargs)
- array_stream = CArrayStream.from_array_list([], na.c_schema(na.int32()))
+ array_stream = CArrayStream.from_c_arrays([], na.c_schema(na.int32()))
array_stream_wrapper = CArrayStreamWrapper(array_stream)
from_protocol = na.c_array_stream(array_stream_wrapper)
assert array_stream.is_valid() is False
@@ -70,14 +69,14 @@ def test_c_array_stream_from_old_pyarrow():
def test_c_array_stream_from_bare_capsule():
- array_stream = CArrayStream.from_array_list([], na.c_schema(na.int32()))
+ array_stream = CArrayStream.from_c_arrays([], na.c_schema(na.int32()))
# Check from bare capsule without supplying a schema
capsule = array_stream.__arrow_c_stream__()
from_capsule = na.c_array_stream(capsule)
assert from_capsule.get_schema().format == "i"
- array_stream = CArrayStream.from_array_list([], na.c_schema(na.int32()))
+ array_stream = CArrayStream.from_c_arrays([], na.c_schema(na.int32()))
capsule = array_stream.__arrow_c_stream__()
with pytest.raises(TypeError, match="Can't import c_array_stream"):
@@ -109,30 +108,30 @@ def test_c_array_stream_error():
def test_array_stream_from_arrays_schema():
schema_in = na.c_schema(na.int32())
- stream = CArrayStream.from_array_list([], schema_in)
+ stream = CArrayStream.from_c_arrays([], schema_in)
assert schema_in.is_valid()
assert list(stream) == []
assert stream.get_schema().format == "i"
# Check move of schema
- CArrayStream.from_array_list([], schema_in, move=True)
+ CArrayStream.from_c_arrays([], schema_in, move=True)
assert schema_in.is_valid() is False
assert stream.get_schema().format == "i"
def test_array_stream_from_arrays():
schema_in = na.c_schema(na.int32())
- array_in = na.c_array([1, 2, 3], schema_in)
+ array_in = na.c_array([1, 2, 3], na.int32())
array_in_buffers = array_in.buffers
- stream = CArrayStream.from_array_list([array_in], schema_in)
+ stream = CArrayStream.from_c_arrays([array_in], schema_in)
assert array_in.is_valid()
arrays = list(stream)
assert len(arrays) == 1
assert arrays[0].buffers == array_in_buffers
# Check move of array
- stream = CArrayStream.from_array_list([array_in], schema_in, move=True)
+ stream = CArrayStream.from_c_arrays([array_in], schema_in, move=True)
assert array_in.is_valid() is False
arrays = list(stream)
assert len(arrays) == 1
@@ -144,12 +143,11 @@ def test_array_stream_from_arrays_validate():
array_in = na.c_array([1, 2, 3], na.int32())
# Check that we can skip validation and proceed without error
- stream = CArrayStream.from_array_list([array_in], schema_in,
validate=False)
+ stream = CArrayStream.from_c_arrays([array_in], schema_in, validate=False)
arrays = list(stream)
assert len(arrays) == 1
assert arrays[0].n_buffers == 2
# ...but that validation does happen by default
- msg = "Expected array with 0 buffer"
- with pytest.raises(NanoarrowException, match=msg):
- CArrayStream.from_array_list([array_in], schema_in)
+ with pytest.raises(ValueError, match="Expected schema"):
+ CArrayStream.from_c_arrays([array_in], schema_in)
diff --git a/python/tests/test_c_schema.py b/python/tests/test_c_schema.py
index 74c69bd7..f70f49ab 100644
--- a/python/tests/test_c_schema.py
+++ b/python/tests/test_c_schema.py
@@ -127,6 +127,70 @@ def test_c_schema_metadata():
assert view.extension_metadata == b"some_metadata"
+def test_c_schema_equals():
+ int32 = na.c_schema(na.int32())
+ struct = na.c_schema(na.struct({"col1": na.int32()}))
+ dictionary = na.c_schema(na.dictionary(na.int32(), na.string()))
+ ordered_dictionary = na.c_schema(
+ na.dictionary(na.int32(), na.string(), dictionary_ordered=True)
+ )
+
+ # Check schemas pointing to the same ArrowSchema
+ assert int32.type_equals(int32)
+
+ # Check equality with deep copies
+ assert int32.type_equals(int32.__deepcopy__())
+ assert struct.type_equals(struct.__deepcopy__())
+ assert dictionary.type_equals(dictionary.__deepcopy__())
+
+ # Check inequality because of format
+ assert int32.type_equals(struct) is False
+
+ # Check inequality because of nullability
+ assert int32.type_equals(int32.modify(flags=0), check_nullability=True) is
False
+ # ...but not by default
+ assert int32.type_equals(int32.modify(flags=0)) is True
+
+ # Check inequality of type information encoded in flags
+ assert dictionary.type_equals(ordered_dictionary) is False
+
+ # Check inequality because of number of children
+ assert struct.type_equals(struct.modify(children=[])) is False
+
+ # Check inequality because of a difference in the children
+ assert struct.type_equals(struct.modify(children=[dictionary])) is False
+
+ # Check inequality because of dictionary presence
+ assert int32.type_equals(dictionary) is False
+ assert dictionary.type_equals(int32) is False
+
+ # Check inequality because of dictionary index type
+ assert (
+ dictionary.type_equals(na.c_schema(na.dictionary(na.int64(),
na.string())))
+ is False
+ )
+
+ # Check inequality because of dictionary value type
+ assert dictionary.type_equals(dictionary.modify(dictionary=struct)) is
False
+
+
+def test_c_schema_assert_type_equal():
+ from nanoarrow._lib import assert_type_equal
+
+ int32 = na.c_schema(na.int32())
+ string = na.c_schema(na.string())
+
+ with pytest.raises(TypeError):
+ assert_type_equal(None, int32)
+
+ with pytest.raises(TypeError):
+ assert_type_equal(int32, None)
+
+ msg = "Expected schema\n 'string'\nbut got\n 'int32'"
+ with pytest.raises(ValueError, match=msg):
+ assert_type_equal(int32, string)
+
+
def test_c_schema_modify():
schema = na.c_schema(na.null())
diff --git a/python/tests/test_capsules.py b/python/tests/test_capsules.py
index 2cf7fbf4..aa5b1743 100644
--- a/python/tests/test_capsules.py
+++ b/python/tests/test_capsules.py
@@ -74,7 +74,7 @@ def test_array():
array = na.c_array(arr_obj)
# some basic validation
assert array.is_valid()
- assert array.length == 3
+ assert len(array) == 3
assert array.schema._to_string(recursive=True) == "int32"
# roundtrip
@@ -98,7 +98,7 @@ def test_array_stream():
# some basic validation
assert array_stream.is_valid()
array = array_stream.get_next()
- assert array.length == 3
+ assert len(array) == 3
assert (
array_stream.get_schema()._to_string(recursive=True)
== "struct<some_column: int32>"
diff --git a/python/tests/test_device.py b/python/tests/test_device.py
index 1158337a..09d897a5 100644
--- a/python/tests/test_device.py
+++ b/python/tests/test_device.py
@@ -43,12 +43,12 @@ def test_c_device_array():
assert darray.schema.format == "i"
- assert darray.array.length == 3
+ assert len(darray.array) == 3
assert darray.array.device_type == device.cpu().device_type
assert darray.array.device_id == device.cpu().device_id
darray_view = darray.view()
- assert darray_view.length == 3
+ assert len(darray_view) == 3
assert list(darray_view.buffer(1)) == [1, 2, 3]
# A CDeviceArray should be returned as is
@@ -75,7 +75,7 @@ def test_c_device_array_protocol():
darray2 = device.c_device_array(wrapper)
assert darray2.schema.format == "i"
- assert darray2.array.length == 3
+ assert len(darray2.array) == 3
assert darray2.array.buffers == darray.array.buffers
with pytest.raises(NotImplementedError):
diff --git a/python/tests/test_ipc.py b/python/tests/test_ipc.py
index 6d281020..b9d15321 100644
--- a/python/tests/test_ipc.py
+++ b/python/tests/test_ipc.py
@@ -62,7 +62,7 @@ def test_ipc_stream_from_readable():
with na.c_array_stream(input) as stream:
batches = list(stream)
assert len(batches) == 1
- assert batches[0].length == 3
+ assert len(batches[0]) == 3
def test_ipc_stream_from_path():
@@ -76,7 +76,7 @@ def test_ipc_stream_from_path():
with na.c_array_stream(input) as stream:
batches = list(stream)
assert len(batches) == 1
- assert batches[0].length == 3
+ assert len(batches[0]) == 3
def test_ipc_stream_from_url():
@@ -90,7 +90,7 @@ def test_ipc_stream_from_url():
with na.c_array_stream(input) as stream:
batches = list(stream)
assert len(batches) == 1
- assert batches[0].length == 3
+ assert len(batches[0]) == 3
def test_ipc_stream_python_exception_on_read():
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index e138ecba..eccd2378 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -89,6 +89,7 @@ def test_c_array():
array = na.c_array(pa.array([1, 2, 3], pa.int32()))
assert array.is_valid() is True
assert array.length == 3
+ assert len(array) == 3
assert array.offset == 0
assert array.null_count == 0
assert array.n_buffers == 2