paleolimbot commented on code in PR #454:
URL: https://github.com/apache/arrow-nanoarrow/pull/454#discussion_r1598502717


##########
python/src/nanoarrow/visitor.py:
##########
@@ -0,0 +1,179 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any, List, Sequence, Tuple, Union
+
+from nanoarrow._lib import CArrayView
+from nanoarrow.c_array_stream import c_array_stream
+from nanoarrow.iterator import ArrayViewBaseIterator, PyIterator
+from nanoarrow.schema import Type
+
+
+def to_pylist(obj, schema=None) -> List:
+    """Convert ``obj`` to a ``list()` of Python objects
+
+    Computes an identical value to ``list(iterator.iter_py())`` but is several
+    times faster.
+
+    Paramters
+    ---------
+    obj : array stream-like
+        An array-like or array stream-like object as sanitized by
+        :func:`c_array_stream`.
+    schema : schema-like, optional
+        An optional schema, passed to :func:`c_array_stream`.
+
+    Examples
+    --------
+
+    >>> import nanoarrow as na
+    >>> from nanoarrow import visitor
+    >>> array = na.c_array([1, 2, 3], na.int32())
+    >>> visitor.to_pylist(array)
+    [1, 2, 3]
+    """
+    return ListBuilder.visit(obj, schema)
+
+
+def to_columns(obj, schema=None) -> Tuple[List[str], List[Sequence]]:
+    """Convert ``obj`` to a ``list()` of sequences
+
+    Converts a stream of struct arrays into its column-wise representation
+    such that each column is either a contiguous buffer or a ``list()``.
+
+    Paramters
+    ---------
+    obj : array stream-like
+        An array-like or array stream-like object as sanitized by
+        :func:`c_array_stream`.
+    schema : schema-like, optional
+        An optional schema, passed to :func:`c_array_stream`.
+
+    Examples
+    --------
+
+    >>> import nanoarrow as na
+    >>> from nanoarrow import visitor
+    >>> import pyarrow as pa
+    >>> array = pa.record_batch([pa.array([1, 2, 3])], names=["col1"])
+    >>> names, columns = visitor.to_columns(array)
+    >>> names
+    ['col1']
+    >>> columns
+    [[1, 2, 3]]
+    """
+    return ColumnsBuilder.visit(obj, schema)
+
+
+class ArrayStreamVisitor(ArrayViewBaseIterator):
+    """Compute a value from one or more arrays in an ArrowArrayStream
+
+    This class supports a (currently internal) pattern for building
+    output from a zero or more arrays in a stream.
+
+    """
+
+    @classmethod
+    def visit(cls, obj, schema=None, total_elements=None, **kwargs):
+        """Visit all chunks in ``obj`` as a :func:`c_array_stream`."""
+
+        if total_elements is None and hasattr(obj, "__len__"):
+            total_elements = len(obj)
+
+        with c_array_stream(obj, schema=schema) as stream:
+            visitor = cls(stream._get_cached_schema(), **kwargs)
+            visitor.begin(total_elements)
+
+            visitor_set_array = visitor._set_array
+            visit_chunk_view = visitor.visit_chunk_view
+            array_view = visitor._array_view
+
+            for array in stream:
+                visitor_set_array(array)
+                visit_chunk_view(array_view)
+
+        return visitor.finish()
+
+    def begin(self, total_elements: Union[int, None] = None):
+        """Called after the schema has been resolved but before any
+        chunks have been visited. If the total number of elements
+        (i.e., the sum of all chunk lengths) is known, it is provided here.
+        """
+        pass
+
+    def visit_chunk_view(self, array_view: CArrayView) -> None:
+        """Called exactly one for each chunk seen."""
+        pass
+
+    def finish(self) -> Any:
+        """Called exactly once after all chunks have been visited."""
+        return None
+
+
+class ListBuilder(ArrayStreamVisitor):
+    def __init__(self, schema, *, iterator_cls=PyIterator, _array_view=None):

Review Comment:
   Good call! I'd used `_array_view` because it's sort of internal; however, 
the whole API is currently internal. If/when it's made public there should 
perhaps be a better system for instantiating/efficiently reusing child array 
views that does not stick out like this does.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to