jorisvandenbossche commented on code in PR #464:
URL: https://github.com/apache/arrow-nanoarrow/pull/464#discussion_r1603331402


##########
python/src/nanoarrow/visitor.py:
##########
@@ -15,68 +15,186 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from typing import Any, List, Sequence, Tuple, Union
+from typing import Any, Callable, List, Sequence, Tuple, Union
 
-from nanoarrow._lib import CArrayView
+from nanoarrow._lib import CArrayView, CArrowType, CBuffer, CBufferBuilder
 from nanoarrow.c_array_stream import c_array_stream
+from nanoarrow.c_schema import c_schema_view
 from nanoarrow.iterator import ArrayViewBaseIterator, PyIterator
 from nanoarrow.schema import Type
 
 
-def to_pylist(obj, schema=None) -> List:
-    """Convert ``obj`` to a ``list()` of Python objects
+class ArrayViewVisitable:
+    """Mixin class providing conversion methods based on visitors
+
+    Can be used with classes that implement ``__arrow_c_stream__()``
+    or ``__arrow_c_array__()``.
+    """
+
+    def to_pylist(self) -> List:
+        """Convert to a ``list()`` of Python objects

Review Comment:
   ```suggestion
           """Convert to a ``list`` of Python objects
   ```
   
   (the type is `list` itself, without calling it)



##########
python/src/nanoarrow/visitor.py:
##########
@@ -15,68 +15,186 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from typing import Any, List, Sequence, Tuple, Union
+from typing import Any, Callable, List, Sequence, Tuple, Union
 
-from nanoarrow._lib import CArrayView
+from nanoarrow._lib import CArrayView, CArrowType, CBuffer, CBufferBuilder
 from nanoarrow.c_array_stream import c_array_stream
+from nanoarrow.c_schema import c_schema_view
 from nanoarrow.iterator import ArrayViewBaseIterator, PyIterator
 from nanoarrow.schema import Type
 
 
-def to_pylist(obj, schema=None) -> List:
-    """Convert ``obj`` to a ``list()` of Python objects
+class ArrayViewVisitable:
+    """Mixin class providing conversion methods based on visitors
+
+    Can be used with classes that implement ``__arrow_c_stream__()``
+    or ``__arrow_c_array__()``.
+    """
+
+    def to_pylist(self) -> List:
+        """Convert to a ``list()`` of Python objects
+
+        Computes an identical value to ``list(iter_py())`` but can be much
+        faster.
+
+        Examples
+        --------
+
+        >>> import nanoarrow as na
+        >>> from nanoarrow import visitor
+        >>> array = na.Array([1, 2, 3], na.int32())
+        >>> array.to_pylist()
+        [1, 2, 3]
+        """
+        return ListBuilder.visit(self)
+
+    def to_column_list(self, handle_nulls=None) -> Tuple[List[str], 
List[Sequence]]:
+        """Convert to a ``list()` of contiguous sequences
+
+        Converts a stream of struct arrays into its column-wise representation
+        according to :meth:`to_column`.
+
+        Paramters
+        ---------
+        handle_nulls : callable
+            A function returning a sequence based on a validity bytemap and a
+            contiguous buffer of values (e.g., the callable returned by
+            :meth:`nulls_as_sentinel`).
+
+        Examples
+        --------
+
+        >>> import nanoarrow as na
+        >>> import pyarrow as pa
+        >>> batch = pa.record_batch({"col1": [1, 2, 3], "col2": ["a", "b", 
"c"]})
+        >>> names, columns = na.Array(batch).to_column_list()
+        >>> names
+        ['col1', 'col2']
+        >>> columns
+        [nanoarrow.c_lib.CBuffer(int64[24 b] 1 2 3), ['a', 'b', 'c']]
+        """
+        return ColumnsBuilder.visit(self, handle_nulls=handle_nulls)
+
+    def to_column(self, handle_nulls=None) -> Sequence:
+        """Convert to a contiguous sequence
+
+        Converts a stream of arrays into a columnar representation
+        such that each column is either a contiguous buffer or a ``list()``.
+        Integer, float, and interval arrays are currently converted to their
+        contiguous buffer representation; other types are returned as a list
+        of Python objects. The sequences returned by :meth:`to_column` are
+        designed to work as input to ``pandas.Series`` and/or 
``numpy.array()``.
+
+        Parameters
+        ---------
+        obj : array stream-like
+            An array-like or array stream-like object as sanitized by
+            :func:`c_array_stream`.
+        schema : schema-like, optional
+            An optional schema, passed to :func:`c_array_stream`.
+        handle_nulls : callable
+            A function returning a sequence based on a validity bytemap and a
+            contiguous buffer of values (e.g., the callable returned by
+            :meth:`nulls_as_sentinel`).

Review Comment:
   Mention that the default is `nulls_forbid()`?



##########
python/src/nanoarrow/visitor.py:
##########
@@ -15,68 +15,186 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from typing import Any, List, Sequence, Tuple, Union
+from typing import Any, Callable, List, Sequence, Tuple, Union
 
-from nanoarrow._lib import CArrayView
+from nanoarrow._lib import CArrayView, CArrowType, CBuffer, CBufferBuilder
 from nanoarrow.c_array_stream import c_array_stream
+from nanoarrow.c_schema import c_schema_view
 from nanoarrow.iterator import ArrayViewBaseIterator, PyIterator
 from nanoarrow.schema import Type
 
 
-def to_pylist(obj, schema=None) -> List:
-    """Convert ``obj`` to a ``list()` of Python objects
+class ArrayViewVisitable:
+    """Mixin class providing conversion methods based on visitors
+
+    Can be used with classes that implement ``__arrow_c_stream__()``
+    or ``__arrow_c_array__()``.
+    """
+
+    def to_pylist(self) -> List:
+        """Convert to a ``list()`` of Python objects
+
+        Computes an identical value to ``list(iter_py())`` but can be much
+        faster.
+
+        Examples
+        --------
+
+        >>> import nanoarrow as na
+        >>> from nanoarrow import visitor
+        >>> array = na.Array([1, 2, 3], na.int32())
+        >>> array.to_pylist()
+        [1, 2, 3]
+        """
+        return ListBuilder.visit(self)
+
+    def to_column_list(self, handle_nulls=None) -> Tuple[List[str], 
List[Sequence]]:
+        """Convert to a ``list()` of contiguous sequences

Review Comment:
   ```suggestion
           """Convert to a ``list`` of contiguous sequences
   ```
   (same as above and mismatching number of `` ` ``)



##########
python/src/nanoarrow/visitor.py:
##########
@@ -15,68 +15,186 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from typing import Any, List, Sequence, Tuple, Union
+from typing import Any, Callable, List, Sequence, Tuple, Union
 
-from nanoarrow._lib import CArrayView
+from nanoarrow._lib import CArrayView, CArrowType, CBuffer, CBufferBuilder
 from nanoarrow.c_array_stream import c_array_stream
+from nanoarrow.c_schema import c_schema_view
 from nanoarrow.iterator import ArrayViewBaseIterator, PyIterator
 from nanoarrow.schema import Type
 
 
-def to_pylist(obj, schema=None) -> List:
-    """Convert ``obj`` to a ``list()` of Python objects
+class ArrayViewVisitable:
+    """Mixin class providing conversion methods based on visitors
+
+    Can be used with classes that implement ``__arrow_c_stream__()``
+    or ``__arrow_c_array__()``.
+    """
+
+    def to_pylist(self) -> List:
+        """Convert to a ``list()`` of Python objects
+
+        Computes an identical value to ``list(iter_py())`` but can be much
+        faster.
+
+        Examples
+        --------
+
+        >>> import nanoarrow as na
+        >>> from nanoarrow import visitor
+        >>> array = na.Array([1, 2, 3], na.int32())
+        >>> array.to_pylist()
+        [1, 2, 3]
+        """
+        return ListBuilder.visit(self)
+
+    def to_column_list(self, handle_nulls=None) -> Tuple[List[str], 
List[Sequence]]:
+        """Convert to a ``list()` of contiguous sequences
+
+        Converts a stream of struct arrays into its column-wise representation
+        according to :meth:`to_column`.
+
+        Paramters
+        ---------
+        handle_nulls : callable
+            A function returning a sequence based on a validity bytemap and a
+            contiguous buffer of values (e.g., the callable returned by
+            :meth:`nulls_as_sentinel`).
+
+        Examples
+        --------
+
+        >>> import nanoarrow as na
+        >>> import pyarrow as pa
+        >>> batch = pa.record_batch({"col1": [1, 2, 3], "col2": ["a", "b", 
"c"]})
+        >>> names, columns = na.Array(batch).to_column_list()
+        >>> names
+        ['col1', 'col2']
+        >>> columns
+        [nanoarrow.c_lib.CBuffer(int64[24 b] 1 2 3), ['a', 'b', 'c']]
+        """
+        return ColumnsBuilder.visit(self, handle_nulls=handle_nulls)
+
+    def to_column(self, handle_nulls=None) -> Sequence:
+        """Convert to a contiguous sequence
+
+        Converts a stream of arrays into a columnar representation
+        such that each column is either a contiguous buffer or a ``list()``.
+        Integer, float, and interval arrays are currently converted to their
+        contiguous buffer representation; other types are returned as a list
+        of Python objects. The sequences returned by :meth:`to_column` are
+        designed to work as input to ``pandas.Series`` and/or 
``numpy.array()``.
+
+        Parameters
+        ---------
+        obj : array stream-like
+            An array-like or array stream-like object as sanitized by
+            :func:`c_array_stream`.
+        schema : schema-like, optional
+            An optional schema, passed to :func:`c_array_stream`.
+        handle_nulls : callable
+            A function returning a sequence based on a validity bytemap and a
+            contiguous buffer of values (e.g., the callable returned by
+            :meth:`nulls_as_sentinel`).

Review Comment:
   And maybe also list the built-in options



##########
python/src/nanoarrow/visitor.py:
##########
@@ -15,68 +15,186 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from typing import Any, List, Sequence, Tuple, Union
+from typing import Any, Callable, List, Sequence, Tuple, Union
 
-from nanoarrow._lib import CArrayView
+from nanoarrow._lib import CArrayView, CArrowType, CBuffer, CBufferBuilder
 from nanoarrow.c_array_stream import c_array_stream
+from nanoarrow.c_schema import c_schema_view
 from nanoarrow.iterator import ArrayViewBaseIterator, PyIterator
 from nanoarrow.schema import Type
 
 
-def to_pylist(obj, schema=None) -> List:
-    """Convert ``obj`` to a ``list()` of Python objects
+class ArrayViewVisitable:
+    """Mixin class providing conversion methods based on visitors
+
+    Can be used with classes that implement ``__arrow_c_stream__()``
+    or ``__arrow_c_array__()``.
+    """
+
+    def to_pylist(self) -> List:
+        """Convert to a ``list()`` of Python objects
+
+        Computes an identical value to ``list(iter_py())`` but can be much
+        faster.
+
+        Examples
+        --------
+
+        >>> import nanoarrow as na
+        >>> from nanoarrow import visitor
+        >>> array = na.Array([1, 2, 3], na.int32())
+        >>> array.to_pylist()
+        [1, 2, 3]
+        """
+        return ListBuilder.visit(self)
+
+    def to_column_list(self, handle_nulls=None) -> Tuple[List[str], 
List[Sequence]]:
+        """Convert to a ``list()` of contiguous sequences
+
+        Converts a stream of struct arrays into its column-wise representation
+        according to :meth:`to_column`.
+
+        Paramters
+        ---------
+        handle_nulls : callable
+            A function returning a sequence based on a validity bytemap and a
+            contiguous buffer of values (e.g., the callable returned by
+            :meth:`nulls_as_sentinel`).
+
+        Examples
+        --------
+
+        >>> import nanoarrow as na
+        >>> import pyarrow as pa
+        >>> batch = pa.record_batch({"col1": [1, 2, 3], "col2": ["a", "b", 
"c"]})
+        >>> names, columns = na.Array(batch).to_column_list()
+        >>> names
+        ['col1', 'col2']
+        >>> columns
+        [nanoarrow.c_lib.CBuffer(int64[24 b] 1 2 3), ['a', 'b', 'c']]
+        """
+        return ColumnsBuilder.visit(self, handle_nulls=handle_nulls)
+
+    def to_column(self, handle_nulls=None) -> Sequence:
+        """Convert to a contiguous sequence
+
+        Converts a stream of arrays into a columnar representation
+        such that each column is either a contiguous buffer or a ``list()``.
+        Integer, float, and interval arrays are currently converted to their
+        contiguous buffer representation; other types are returned as a list
+        of Python objects. The sequences returned by :meth:`to_column` are
+        designed to work as input to ``pandas.Series`` and/or 
``numpy.array()``.
+
+        Parameters
+        ---------
+        obj : array stream-like
+            An array-like or array stream-like object as sanitized by
+            :func:`c_array_stream`.
+        schema : schema-like, optional
+            An optional schema, passed to :func:`c_array_stream`.

Review Comment:
   This is not actually present in the signature?



##########
python/src/nanoarrow/visitor.py:
##########
@@ -15,68 +15,186 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from typing import Any, List, Sequence, Tuple, Union
+from typing import Any, Callable, List, Sequence, Tuple, Union
 
-from nanoarrow._lib import CArrayView
+from nanoarrow._lib import CArrayView, CArrowType, CBuffer, CBufferBuilder
 from nanoarrow.c_array_stream import c_array_stream
+from nanoarrow.c_schema import c_schema_view
 from nanoarrow.iterator import ArrayViewBaseIterator, PyIterator
 from nanoarrow.schema import Type
 
 
-def to_pylist(obj, schema=None) -> List:
-    """Convert ``obj`` to a ``list()` of Python objects
+class ArrayViewVisitable:
+    """Mixin class providing conversion methods based on visitors
+
+    Can be used with classes that implement ``__arrow_c_stream__()``
+    or ``__arrow_c_array__()``.
+    """
+
+    def to_pylist(self) -> List:
+        """Convert to a ``list()`` of Python objects
+
+        Computes an identical value to ``list(iter_py())`` but can be much
+        faster.
+
+        Examples
+        --------
+
+        >>> import nanoarrow as na
+        >>> from nanoarrow import visitor
+        >>> array = na.Array([1, 2, 3], na.int32())
+        >>> array.to_pylist()
+        [1, 2, 3]
+        """
+        return ListBuilder.visit(self)
+
+    def to_column_list(self, handle_nulls=None) -> Tuple[List[str], 
List[Sequence]]:
+        """Convert to a ``list()` of contiguous sequences
+
+        Converts a stream of struct arrays into its column-wise representation
+        according to :meth:`to_column`.
+
+        Paramters
+        ---------
+        handle_nulls : callable
+            A function returning a sequence based on a validity bytemap and a
+            contiguous buffer of values (e.g., the callable returned by
+            :meth:`nulls_as_sentinel`).
+
+        Examples
+        --------
+
+        >>> import nanoarrow as na
+        >>> import pyarrow as pa
+        >>> batch = pa.record_batch({"col1": [1, 2, 3], "col2": ["a", "b", 
"c"]})
+        >>> names, columns = na.Array(batch).to_column_list()
+        >>> names
+        ['col1', 'col2']
+        >>> columns
+        [nanoarrow.c_lib.CBuffer(int64[24 b] 1 2 3), ['a', 'b', 'c']]
+        """
+        return ColumnsBuilder.visit(self, handle_nulls=handle_nulls)
+
+    def to_column(self, handle_nulls=None) -> Sequence:
+        """Convert to a contiguous sequence
+
+        Converts a stream of arrays into a columnar representation
+        such that each column is either a contiguous buffer or a ``list()``.
+        Integer, float, and interval arrays are currently converted to their
+        contiguous buffer representation; other types are returned as a list
+        of Python objects. The sequences returned by :meth:`to_column` are
+        designed to work as input to ``pandas.Series`` and/or 
``numpy.array()``.
+
+        Parameters
+        ---------
+        obj : array stream-like
+            An array-like or array stream-like object as sanitized by
+            :func:`c_array_stream`.
+        schema : schema-like, optional
+            An optional schema, passed to :func:`c_array_stream`.
+        handle_nulls : callable
+            A function returning a sequence based on a validity bytemap and a
+            contiguous buffer of values (e.g., the callable returned by
+            :meth:`nulls_as_sentinel`).
+
+        Examples
+        --------
+        >>> import nanoarrow as na
+        >>> na.Array([1, 2, 3], na.int32()).to_column()
+        nanoarrow.c_lib.CBuffer(int32[12 b] 1 2 3)
+        """
+        return SingleColumnBuilder.visit(self, handle_nulls=handle_nulls)
 
-    Computes an identical value to ``list(iterator.iter_py())`` but is several
-    times faster.
 
-    Paramters
-    ---------
-    obj : array stream-like
-        An array-like or array stream-like object as sanitized by
-        :func:`c_array_stream`.
-    schema : schema-like, optional
-        An optional schema, passed to :func:`c_array_stream`.
+def nulls_forbid() -> Callable[[CBuffer, Sequence], Sequence]:
+    """Erroring null handler
+
+    A null handler that errors when it encounters nulls.
 
     Examples
     --------
 
     >>> import nanoarrow as na
-    >>> from nanoarrow import visitor
-    >>> array = na.c_array([1, 2, 3], na.int32())
-    >>> visitor.to_pylist(array)
-    [1, 2, 3]
+    >>> na.Array([1, 2, 3], na.int32()).to_column(na.nulls_forbid())
+    nanoarrow.c_lib.CBuffer(int32[12 b] 1 2 3)
+    >>> na.Array([1, None, 3], na.int32()).to_column(na.nulls_forbid())
+    Traceback (most recent call last):
+    ...
+    ValueError: Null present with null_handler=nulls_forbid()
     """
-    return ListBuilder.visit(obj, schema)
 
+    def handle(is_valid, data):
+        if len(is_valid) > 0:
+            raise ValueError("Null present with null_handler=nulls_forbid()")

Review Comment:
   It's not because there is a validity buffer that it includes any nulls? Or 
did you ensure that before? (eg not convert the bitmap to bytemap if the bitmap 
is all set?)



##########
python/src/nanoarrow/visitor.py:
##########
@@ -15,68 +15,186 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from typing import Any, List, Sequence, Tuple, Union
+from typing import Any, Callable, List, Sequence, Tuple, Union
 
-from nanoarrow._lib import CArrayView
+from nanoarrow._lib import CArrayView, CArrowType, CBuffer, CBufferBuilder
 from nanoarrow.c_array_stream import c_array_stream
+from nanoarrow.c_schema import c_schema_view
 from nanoarrow.iterator import ArrayViewBaseIterator, PyIterator
 from nanoarrow.schema import Type
 
 
-def to_pylist(obj, schema=None) -> List:
-    """Convert ``obj`` to a ``list()` of Python objects
+class ArrayViewVisitable:
+    """Mixin class providing conversion methods based on visitors
+
+    Can be used with classes that implement ``__arrow_c_stream__()``
+    or ``__arrow_c_array__()``.
+    """
+
+    def to_pylist(self) -> List:
+        """Convert to a ``list()`` of Python objects
+
+        Computes an identical value to ``list(iter_py())`` but can be much
+        faster.
+
+        Examples
+        --------
+
+        >>> import nanoarrow as na
+        >>> from nanoarrow import visitor
+        >>> array = na.Array([1, 2, 3], na.int32())
+        >>> array.to_pylist()
+        [1, 2, 3]
+        """
+        return ListBuilder.visit(self)
+
+    def to_column_list(self, handle_nulls=None) -> Tuple[List[str], 
List[Sequence]]:
+        """Convert to a ``list()` of contiguous sequences
+
+        Converts a stream of struct arrays into its column-wise representation
+        according to :meth:`to_column`.
+
+        Paramters
+        ---------
+        handle_nulls : callable
+            A function returning a sequence based on a validity bytemap and a
+            contiguous buffer of values (e.g., the callable returned by
+            :meth:`nulls_as_sentinel`).
+
+        Examples
+        --------
+
+        >>> import nanoarrow as na
+        >>> import pyarrow as pa
+        >>> batch = pa.record_batch({"col1": [1, 2, 3], "col2": ["a", "b", 
"c"]})
+        >>> names, columns = na.Array(batch).to_column_list()
+        >>> names
+        ['col1', 'col2']
+        >>> columns
+        [nanoarrow.c_lib.CBuffer(int64[24 b] 1 2 3), ['a', 'b', 'c']]
+        """
+        return ColumnsBuilder.visit(self, handle_nulls=handle_nulls)
+
+    def to_column(self, handle_nulls=None) -> Sequence:
+        """Convert to a contiguous sequence
+
+        Converts a stream of arrays into a columnar representation
+        such that each column is either a contiguous buffer or a ``list()``.

Review Comment:
   ```suggestion
           such that each column is either a contiguous buffer or a ``list``.
   ```



##########
python/src/nanoarrow/visitor.py:
##########
@@ -15,68 +15,186 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from typing import Any, List, Sequence, Tuple, Union
+from typing import Any, Callable, List, Sequence, Tuple, Union
 
-from nanoarrow._lib import CArrayView
+from nanoarrow._lib import CArrayView, CArrowType, CBuffer, CBufferBuilder
 from nanoarrow.c_array_stream import c_array_stream
+from nanoarrow.c_schema import c_schema_view
 from nanoarrow.iterator import ArrayViewBaseIterator, PyIterator
 from nanoarrow.schema import Type
 
 
-def to_pylist(obj, schema=None) -> List:
-    """Convert ``obj`` to a ``list()` of Python objects
+class ArrayViewVisitable:
+    """Mixin class providing conversion methods based on visitors
+
+    Can be used with classes that implement ``__arrow_c_stream__()``
+    or ``__arrow_c_array__()``.
+    """
+
+    def to_pylist(self) -> List:
+        """Convert to a ``list()`` of Python objects
+
+        Computes an identical value to ``list(iter_py())`` but can be much
+        faster.
+
+        Examples
+        --------
+
+        >>> import nanoarrow as na
+        >>> from nanoarrow import visitor
+        >>> array = na.Array([1, 2, 3], na.int32())
+        >>> array.to_pylist()
+        [1, 2, 3]
+        """
+        return ListBuilder.visit(self)
+
+    def to_column_list(self, handle_nulls=None) -> Tuple[List[str], 
List[Sequence]]:
+        """Convert to a ``list()` of contiguous sequences
+
+        Converts a stream of struct arrays into its column-wise representation
+        according to :meth:`to_column`.
+
+        Paramters
+        ---------
+        handle_nulls : callable
+            A function returning a sequence based on a validity bytemap and a
+            contiguous buffer of values (e.g., the callable returned by
+            :meth:`nulls_as_sentinel`).
+
+        Examples
+        --------
+
+        >>> import nanoarrow as na
+        >>> import pyarrow as pa
+        >>> batch = pa.record_batch({"col1": [1, 2, 3], "col2": ["a", "b", 
"c"]})
+        >>> names, columns = na.Array(batch).to_column_list()
+        >>> names
+        ['col1', 'col2']
+        >>> columns
+        [nanoarrow.c_lib.CBuffer(int64[24 b] 1 2 3), ['a', 'b', 'c']]
+        """
+        return ColumnsBuilder.visit(self, handle_nulls=handle_nulls)
+
+    def to_column(self, handle_nulls=None) -> Sequence:
+        """Convert to a contiguous sequence
+
+        Converts a stream of arrays into a columnar representation
+        such that each column is either a contiguous buffer or a ``list()``.
+        Integer, float, and interval arrays are currently converted to their
+        contiguous buffer representation; other types are returned as a list
+        of Python objects. The sequences returned by :meth:`to_column` are
+        designed to work as input to ``pandas.Series`` and/or 
``numpy.array()``.
+
+        Parameters
+        ---------
+        obj : array stream-like
+            An array-like or array stream-like object as sanitized by
+            :func:`c_array_stream`.
+        schema : schema-like, optional
+            An optional schema, passed to :func:`c_array_stream`.
+        handle_nulls : callable
+            A function returning a sequence based on a validity bytemap and a
+            contiguous buffer of values (e.g., the callable returned by
+            :meth:`nulls_as_sentinel`).
+
+        Examples
+        --------
+        >>> import nanoarrow as na
+        >>> na.Array([1, 2, 3], na.int32()).to_column()
+        nanoarrow.c_lib.CBuffer(int32[12 b] 1 2 3)
+        """
+        return SingleColumnBuilder.visit(self, handle_nulls=handle_nulls)
 
-    Computes an identical value to ``list(iterator.iter_py())`` but is several
-    times faster.
 
-    Paramters
-    ---------
-    obj : array stream-like
-        An array-like or array stream-like object as sanitized by
-        :func:`c_array_stream`.
-    schema : schema-like, optional
-        An optional schema, passed to :func:`c_array_stream`.
+def nulls_forbid() -> Callable[[CBuffer, Sequence], Sequence]:
+    """Erroring null handler
+
+    A null handler that errors when it encounters nulls.
 
     Examples
     --------
 
     >>> import nanoarrow as na
-    >>> from nanoarrow import visitor
-    >>> array = na.c_array([1, 2, 3], na.int32())
-    >>> visitor.to_pylist(array)
-    [1, 2, 3]
+    >>> na.Array([1, 2, 3], na.int32()).to_column(na.nulls_forbid())
+    nanoarrow.c_lib.CBuffer(int32[12 b] 1 2 3)
+    >>> na.Array([1, None, 3], na.int32()).to_column(na.nulls_forbid())
+    Traceback (most recent call last):
+    ...
+    ValueError: Null present with null_handler=nulls_forbid()
     """
-    return ListBuilder.visit(obj, schema)
 
+    def handle(is_valid, data):
+        if len(is_valid) > 0:
+            raise ValueError("Null present with null_handler=nulls_forbid()")
+
+        return data
+
+    return handle
 
-def to_columns(obj, schema=None) -> Tuple[List[str], List[Sequence]]:
-    """Convert ``obj`` to a ``list()` of sequences
 
-    Converts a stream of struct arrays into its column-wise representation
-    such that each column is either a contiguous buffer or a ``list()``.
+def nulls_as_sentinel(sentinel=None):
+    """Sentinel null handler
 
-    Paramters
-    ---------
-    obj : array stream-like
-        An array-like or array stream-like object as sanitized by
-        :func:`c_array_stream`.
-    schema : schema-like, optional
-        An optional schema, passed to :func:`c_array_stream`.
+    A null handler that assigns a sentinel to null values. This is
+    done using numpy using the expression ``data[~is_valid] = sentinel``.
+    The default sentinel value will result in ``nan`` assigned to null
+    values in numeric and boolean outputs.
+
+    Parameters
+    ----------
+    sentinel : scalar, optional
+        The value with which nulls should be replaced.
 
     Examples
     --------
 
     >>> import nanoarrow as na
-    >>> from nanoarrow import visitor
-    >>> import pyarrow as pa
-    >>> array = pa.record_batch([pa.array([1, 2, 3])], names=["col1"])
-    >>> names, columns = visitor.to_columns(array)
-    >>> names
-    ['col1']
-    >>> columns
-    [[1, 2, 3]]
+    >>> na.Array([1, 2, 3], na.int32()).to_column(na.nulls_as_sentinel())
+    array([1, 2, 3], dtype=int32)
+    >>> na.Array([1, None, 3], na.int32()).to_column(na.nulls_as_sentinel())
+    array([ 1., nan,  3.])
+    >>> na.Array([1, None, 3], 
na.int32()).to_column(na.nulls_as_sentinel(-999))
+    array([   1, -999,    3], dtype=int32)
     """
-    return ColumnsBuilder.visit(obj, schema)
+    import numpy as np
+
+    def handle(is_valid, data):
+        is_valid = np.array(is_valid, copy=False)
+        data = np.array(data, copy=False)
+
+        if len(is_valid) > 0:
+            out_type = np.result_type(data, sentinel)
+            data = np.array(data, dtype=out_type, copy=True)
+            data[~is_valid] = sentinel
+            return data
+        else:
+            return data
+
+    return handle
+
+
+def nulls_separate() -> Callable[[CBuffer, Sequence], Tuple[CBuffer, 
Sequence]]:
+    """Return nulls as a tuple of is_valid, data
+
+    A null handler that returns its components.
+
+    Examples
+    --------
+
+    >>> import nanoarrow as na
+    >>> na.Array([1, 2, 3], na.int32()).to_column(na.nulls_separate())
+    (nanoarrow.c_lib.CBuffer(uint8[0 b] ), nanoarrow.c_lib.CBuffer(int32[12 b] 
1 2 3))

Review Comment:
   Should we return `None` here instead of an empty buffer? Because if you 
convert both outputs to numpy arrays, you can have numpy arrays of different 
length (of course, it's not that one of checking for None or for empty / len 0 
is much harder than the other)



##########
python/src/nanoarrow/visitor.py:
##########
@@ -15,68 +15,186 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from typing import Any, List, Sequence, Tuple, Union
+from typing import Any, Callable, List, Sequence, Tuple, Union
 
-from nanoarrow._lib import CArrayView
+from nanoarrow._lib import CArrayView, CArrowType, CBuffer, CBufferBuilder
 from nanoarrow.c_array_stream import c_array_stream
+from nanoarrow.c_schema import c_schema_view
 from nanoarrow.iterator import ArrayViewBaseIterator, PyIterator
 from nanoarrow.schema import Type
 
 
-def to_pylist(obj, schema=None) -> List:
-    """Convert ``obj`` to a ``list()` of Python objects
+class ArrayViewVisitable:
+    """Mixin class providing conversion methods based on visitors
+
+    Can be used with classes that implement ``__arrow_c_stream__()``
+    or ``__arrow_c_array__()``.
+    """
+
+    def to_pylist(self) -> List:
+        """Convert to a ``list()`` of Python objects
+
+        Computes an identical value to ``list(iter_py())`` but can be much
+        faster.
+
+        Examples
+        --------
+
+        >>> import nanoarrow as na
+        >>> from nanoarrow import visitor
+        >>> array = na.Array([1, 2, 3], na.int32())
+        >>> array.to_pylist()
+        [1, 2, 3]
+        """
+        return ListBuilder.visit(self)
+
+    def to_column_list(self, handle_nulls=None) -> Tuple[List[str], 
List[Sequence]]:
+        """Convert to a ``list()` of contiguous sequences
+
+        Converts a stream of struct arrays into its column-wise representation
+        according to :meth:`to_column`.
+
+        Paramters
+        ---------
+        handle_nulls : callable
+            A function returning a sequence based on a validity bytemap and a
+            contiguous buffer of values (e.g., the callable returned by
+            :meth:`nulls_as_sentinel`).
+
+        Examples
+        --------
+
+        >>> import nanoarrow as na
+        >>> import pyarrow as pa
+        >>> batch = pa.record_batch({"col1": [1, 2, 3], "col2": ["a", "b", 
"c"]})
+        >>> names, columns = na.Array(batch).to_column_list()
+        >>> names
+        ['col1', 'col2']
+        >>> columns
+        [nanoarrow.c_lib.CBuffer(int64[24 b] 1 2 3), ['a', 'b', 'c']]
+        """
+        return ColumnsBuilder.visit(self, handle_nulls=handle_nulls)
+
+    def to_column(self, handle_nulls=None) -> Sequence:
+        """Convert to a contiguous sequence
+
+        Converts a stream of arrays into a columnar representation
+        such that each column is either a contiguous buffer or a ``list()``.
+        Integer, float, and interval arrays are currently converted to their

Review Comment:
   What is the reason interval arrays are returned as buffer? (just because 
there is no obvious python object?)
   
   And why are other primitive fixed-width types like timestamp not returned as 
buffers? (because they are not a _c_ primitive type? And have an obvious python 
object to use?) 
   
   I can imagine that for certain purposes, you would actually want the 
integers behind a timestamp column, because that is much cheaper to work with 
than a list of python datetime.datetime objects. 
   
   Should we make this user configurable?



##########
python/src/nanoarrow/visitor.py:
##########
@@ -15,68 +15,186 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from typing import Any, List, Sequence, Tuple, Union
+from typing import Any, Callable, List, Sequence, Tuple, Union
 
-from nanoarrow._lib import CArrayView
+from nanoarrow._lib import CArrayView, CArrowType, CBuffer, CBufferBuilder
 from nanoarrow.c_array_stream import c_array_stream
+from nanoarrow.c_schema import c_schema_view
 from nanoarrow.iterator import ArrayViewBaseIterator, PyIterator
 from nanoarrow.schema import Type
 
 
-def to_pylist(obj, schema=None) -> List:
-    """Convert ``obj`` to a ``list()` of Python objects
+class ArrayViewVisitable:
+    """Mixin class providing conversion methods based on visitors
+
+    Can be used with classes that implement ``__arrow_c_stream__()``
+    or ``__arrow_c_array__()``.
+    """
+
+    def to_pylist(self) -> List:
+        """Convert to a ``list()`` of Python objects
+
+        Computes an identical value to ``list(iter_py())`` but can be much
+        faster.
+
+        Examples
+        --------
+
+        >>> import nanoarrow as na
+        >>> from nanoarrow import visitor
+        >>> array = na.Array([1, 2, 3], na.int32())
+        >>> array.to_pylist()
+        [1, 2, 3]
+        """
+        return ListBuilder.visit(self)
+
+    def to_column_list(self, handle_nulls=None) -> Tuple[List[str], 
List[Sequence]]:
+        """Convert to a ``list()` of contiguous sequences
+
+        Converts a stream of struct arrays into its column-wise representation
+        according to :meth:`to_column`.
+
+        Paramters
+        ---------
+        handle_nulls : callable
+            A function returning a sequence based on a validity bytemap and a
+            contiguous buffer of values (e.g., the callable returned by
+            :meth:`nulls_as_sentinel`).
+
+        Examples
+        --------
+
+        >>> import nanoarrow as na
+        >>> import pyarrow as pa
+        >>> batch = pa.record_batch({"col1": [1, 2, 3], "col2": ["a", "b", 
"c"]})
+        >>> names, columns = na.Array(batch).to_column_list()
+        >>> names
+        ['col1', 'col2']
+        >>> columns
+        [nanoarrow.c_lib.CBuffer(int64[24 b] 1 2 3), ['a', 'b', 'c']]
+        """
+        return ColumnsBuilder.visit(self, handle_nulls=handle_nulls)
+
+    def to_column(self, handle_nulls=None) -> Sequence:
+        """Convert to a contiguous sequence
+
+        Converts a stream of arrays into a columnar representation
+        such that each column is either a contiguous buffer or a ``list()``.
+        Integer, float, and interval arrays are currently converted to their
+        contiguous buffer representation; other types are returned as a list
+        of Python objects. The sequences returned by :meth:`to_column` are
+        designed to work as input to ``pandas.Series`` and/or 
``numpy.array()``.
+
+        Parameters
+        ---------
+        obj : array stream-like
+            An array-like or array stream-like object as sanitized by
+            :func:`c_array_stream`.

Review Comment:
   ```suggestion
   ```
   
   For the methods in this mixin class, this is self?



##########
python/src/nanoarrow/visitor.py:
##########
@@ -15,68 +15,186 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from typing import Any, List, Sequence, Tuple, Union
+from typing import Any, Callable, List, Sequence, Tuple, Union
 
-from nanoarrow._lib import CArrayView
+from nanoarrow._lib import CArrayView, CArrowType, CBuffer, CBufferBuilder
 from nanoarrow.c_array_stream import c_array_stream
+from nanoarrow.c_schema import c_schema_view
 from nanoarrow.iterator import ArrayViewBaseIterator, PyIterator
 from nanoarrow.schema import Type
 
 
-def to_pylist(obj, schema=None) -> List:
-    """Convert ``obj`` to a ``list()` of Python objects
+class ArrayViewVisitable:
+    """Mixin class providing conversion methods based on visitors
+
+    Can be used with classes that implement ``__arrow_c_stream__()``
+    or ``__arrow_c_array__()``.
+    """
+
+    def to_pylist(self) -> List:
+        """Convert to a ``list()`` of Python objects
+
+        Computes an identical value to ``list(iter_py())`` but can be much
+        faster.
+
+        Examples
+        --------
+
+        >>> import nanoarrow as na
+        >>> from nanoarrow import visitor
+        >>> array = na.Array([1, 2, 3], na.int32())
+        >>> array.to_pylist()
+        [1, 2, 3]
+        """
+        return ListBuilder.visit(self)
+
+    def to_column_list(self, handle_nulls=None) -> Tuple[List[str], 
List[Sequence]]:
+        """Convert to a ``list()` of contiguous sequences
+
+        Converts a stream of struct arrays into its column-wise representation
+        according to :meth:`to_column`.
+
+        Paramters
+        ---------
+        handle_nulls : callable
+            A function returning a sequence based on a validity bytemap and a
+            contiguous buffer of values (e.g., the callable returned by
+            :meth:`nulls_as_sentinel`).
+
+        Examples
+        --------
+
+        >>> import nanoarrow as na
+        >>> import pyarrow as pa
+        >>> batch = pa.record_batch({"col1": [1, 2, 3], "col2": ["a", "b", 
"c"]})
+        >>> names, columns = na.Array(batch).to_column_list()
+        >>> names
+        ['col1', 'col2']
+        >>> columns
+        [nanoarrow.c_lib.CBuffer(int64[24 b] 1 2 3), ['a', 'b', 'c']]
+        """
+        return ColumnsBuilder.visit(self, handle_nulls=handle_nulls)
+
+    def to_column(self, handle_nulls=None) -> Sequence:
+        """Convert to a contiguous sequence
+
+        Converts a stream of arrays into a columnar representation
+        such that each column is either a contiguous buffer or a ``list()``.
+        Integer, float, and interval arrays are currently converted to their
+        contiguous buffer representation; other types are returned as a list
+        of Python objects. The sequences returned by :meth:`to_column` are
+        designed to work as input to ``pandas.Series`` and/or 
``numpy.array()``.
+
+        Parameters
+        ---------
+        obj : array stream-like
+            An array-like or array stream-like object as sanitized by
+            :func:`c_array_stream`.
+        schema : schema-like, optional
+            An optional schema, passed to :func:`c_array_stream`.
+        handle_nulls : callable
+            A function returning a sequence based on a validity bytemap and a
+            contiguous buffer of values (e.g., the callable returned by
+            :meth:`nulls_as_sentinel`).
+
+        Examples
+        --------
+        >>> import nanoarrow as na
+        >>> na.Array([1, 2, 3], na.int32()).to_column()
+        nanoarrow.c_lib.CBuffer(int32[12 b] 1 2 3)
+        """
+        return SingleColumnBuilder.visit(self, handle_nulls=handle_nulls)
 
-    Computes an identical value to ``list(iterator.iter_py())`` but is several
-    times faster.
 
-    Paramters
-    ---------
-    obj : array stream-like
-        An array-like or array stream-like object as sanitized by
-        :func:`c_array_stream`.
-    schema : schema-like, optional
-        An optional schema, passed to :func:`c_array_stream`.
+def nulls_forbid() -> Callable[[CBuffer, Sequence], Sequence]:
+    """Erroring null handler
+
+    A null handler that errors when it encounters nulls.
 
     Examples
     --------
 
     >>> import nanoarrow as na
-    >>> from nanoarrow import visitor
-    >>> array = na.c_array([1, 2, 3], na.int32())
-    >>> visitor.to_pylist(array)
-    [1, 2, 3]
+    >>> na.Array([1, 2, 3], na.int32()).to_column(na.nulls_forbid())
+    nanoarrow.c_lib.CBuffer(int32[12 b] 1 2 3)
+    >>> na.Array([1, None, 3], na.int32()).to_column(na.nulls_forbid())
+    Traceback (most recent call last):
+    ...
+    ValueError: Null present with null_handler=nulls_forbid()
     """
-    return ListBuilder.visit(obj, schema)
 
+    def handle(is_valid, data):
+        if len(is_valid) > 0:
+            raise ValueError("Null present with null_handler=nulls_forbid()")
+
+        return data
+
+    return handle
 
-def to_columns(obj, schema=None) -> Tuple[List[str], List[Sequence]]:
-    """Convert ``obj`` to a ``list()` of sequences
 
-    Converts a stream of struct arrays into its column-wise representation
-    such that each column is either a contiguous buffer or a ``list()``.
+def nulls_as_sentinel(sentinel=None):
+    """Sentinel null handler
 
-    Paramters
-    ---------
-    obj : array stream-like
-        An array-like or array stream-like object as sanitized by
-        :func:`c_array_stream`.
-    schema : schema-like, optional
-        An optional schema, passed to :func:`c_array_stream`.
+    A null handler that assigns a sentinel to null values. This is
+    done using numpy using the expression ``data[~is_valid] = sentinel``.
+    The default sentinel value will result in ``nan`` assigned to null
+    values in numeric and boolean outputs.

Review Comment:
   I am not entirely sure this is the case for boolean? (setting None into a 
boolean array sets False)
   
   
   



##########
python/src/nanoarrow/visitor.py:
##########
@@ -15,68 +15,186 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from typing import Any, List, Sequence, Tuple, Union
+from typing import Any, Callable, List, Sequence, Tuple, Union
 
-from nanoarrow._lib import CArrayView
+from nanoarrow._lib import CArrayView, CArrowType, CBuffer, CBufferBuilder
 from nanoarrow.c_array_stream import c_array_stream
+from nanoarrow.c_schema import c_schema_view
 from nanoarrow.iterator import ArrayViewBaseIterator, PyIterator
 from nanoarrow.schema import Type
 
 
-def to_pylist(obj, schema=None) -> List:
-    """Convert ``obj`` to a ``list()` of Python objects
+class ArrayViewVisitable:
+    """Mixin class providing conversion methods based on visitors
+
+    Can be used with classes that implement ``__arrow_c_stream__()``
+    or ``__arrow_c_array__()``.
+    """
+
+    def to_pylist(self) -> List:
+        """Convert to a ``list()`` of Python objects
+
+        Computes an identical value to ``list(iter_py())`` but can be much
+        faster.
+
+        Examples
+        --------
+
+        >>> import nanoarrow as na
+        >>> from nanoarrow import visitor
+        >>> array = na.Array([1, 2, 3], na.int32())
+        >>> array.to_pylist()
+        [1, 2, 3]
+        """
+        return ListBuilder.visit(self)
+
+    def to_column_list(self, handle_nulls=None) -> Tuple[List[str], 
List[Sequence]]:
+        """Convert to a ``list()` of contiguous sequences
+
+        Converts a stream of struct arrays into its column-wise representation
+        according to :meth:`to_column`.
+
+        Paramters
+        ---------
+        handle_nulls : callable
+            A function returning a sequence based on a validity bytemap and a
+            contiguous buffer of values (e.g., the callable returned by
+            :meth:`nulls_as_sentinel`).
+
+        Examples
+        --------
+
+        >>> import nanoarrow as na
+        >>> import pyarrow as pa
+        >>> batch = pa.record_batch({"col1": [1, 2, 3], "col2": ["a", "b", 
"c"]})
+        >>> names, columns = na.Array(batch).to_column_list()
+        >>> names
+        ['col1', 'col2']
+        >>> columns
+        [nanoarrow.c_lib.CBuffer(int64[24 b] 1 2 3), ['a', 'b', 'c']]
+        """
+        return ColumnsBuilder.visit(self, handle_nulls=handle_nulls)
+
+    def to_column(self, handle_nulls=None) -> Sequence:
+        """Convert to a contiguous sequence
+
+        Converts a stream of arrays into a columnar representation
+        such that each column is either a contiguous buffer or a ``list()``.
+        Integer, float, and interval arrays are currently converted to their
+        contiguous buffer representation; other types are returned as a list
+        of Python objects. The sequences returned by :meth:`to_column` are
+        designed to work as input to ``pandas.Series`` and/or 
``numpy.array()``.
+
+        Parameters
+        ---------
+        obj : array stream-like
+            An array-like or array stream-like object as sanitized by
+            :func:`c_array_stream`.
+        schema : schema-like, optional
+            An optional schema, passed to :func:`c_array_stream`.
+        handle_nulls : callable
+            A function returning a sequence based on a validity bytemap and a
+            contiguous buffer of values (e.g., the callable returned by
+            :meth:`nulls_as_sentinel`).
+
+        Examples
+        --------
+        >>> import nanoarrow as na
+        >>> na.Array([1, 2, 3], na.int32()).to_column()
+        nanoarrow.c_lib.CBuffer(int32[12 b] 1 2 3)
+        """
+        return SingleColumnBuilder.visit(self, handle_nulls=handle_nulls)
 
-    Computes an identical value to ``list(iterator.iter_py())`` but is several
-    times faster.
 
-    Paramters
-    ---------
-    obj : array stream-like
-        An array-like or array stream-like object as sanitized by
-        :func:`c_array_stream`.
-    schema : schema-like, optional
-        An optional schema, passed to :func:`c_array_stream`.
+def nulls_forbid() -> Callable[[CBuffer, Sequence], Sequence]:
+    """Erroring null handler
+
+    A null handler that errors when it encounters nulls.
 
     Examples
     --------
 
     >>> import nanoarrow as na
-    >>> from nanoarrow import visitor
-    >>> array = na.c_array([1, 2, 3], na.int32())
-    >>> visitor.to_pylist(array)
-    [1, 2, 3]
+    >>> na.Array([1, 2, 3], na.int32()).to_column(na.nulls_forbid())
+    nanoarrow.c_lib.CBuffer(int32[12 b] 1 2 3)
+    >>> na.Array([1, None, 3], na.int32()).to_column(na.nulls_forbid())
+    Traceback (most recent call last):
+    ...
+    ValueError: Null present with null_handler=nulls_forbid()
     """
-    return ListBuilder.visit(obj, schema)
 
+    def handle(is_valid, data):
+        if len(is_valid) > 0:
+            raise ValueError("Null present with null_handler=nulls_forbid()")
+
+        return data
+
+    return handle
 
-def to_columns(obj, schema=None) -> Tuple[List[str], List[Sequence]]:
-    """Convert ``obj`` to a ``list()` of sequences
 
-    Converts a stream of struct arrays into its column-wise representation
-    such that each column is either a contiguous buffer or a ``list()``.
+def nulls_as_sentinel(sentinel=None):
+    """Sentinel null handler
 
-    Paramters
-    ---------
-    obj : array stream-like
-        An array-like or array stream-like object as sanitized by
-        :func:`c_array_stream`.
-    schema : schema-like, optional
-        An optional schema, passed to :func:`c_array_stream`.
+    A null handler that assigns a sentinel to null values. This is
+    done using numpy using the expression ``data[~is_valid] = sentinel``.
+    The default sentinel value will result in ``nan`` assigned to null
+    values in numeric and boolean outputs.

Review Comment:
   Ah, but you ensure to convert int/bool to float in case there are nulls, so 
that setting as NaN indeed works correctly. Maybe clarify that here



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to