danepitkin commented on code in PR #464:
URL: https://github.com/apache/arrow-nanoarrow/pull/464#discussion_r1603974377
##########
python/src/nanoarrow/visitor.py:
##########
@@ -124,27 +242,26 @@ def finish(self) -> Any:
return None
-class ListBuilder(ArrayStreamVisitor):
- def __init__(self, schema, *, iterator_cls=PyIterator, array_view=None):
+class SingleColumnBuilder(ArrayStreamVisitor):
+ def __init__(self, schema, handle_nulls=None, *, array_view=None):
super().__init__(schema, array_view=array_view)
-
- # Ensure that self._iterator._array_view is self._array_view
- self._iterator = iterator_cls(schema, array_view=self._array_view)
+ cls, kwargs = _resolve_column_builder_cls(
+ self._schema, handle_nulls=handle_nulls
+ )
+ self._visitor = cls(schema, **kwargs, array_view=self._array_view)
def begin(self, total_elements: Union[int, None] = None):
- self._lst = []
+ self._visitor.begin(total_elements)
- def visit_chunk_view(self, array_view: CArrayView):
- # The constructor here ensured that self._iterator._array_view
- # is populated when self._set_array() is called.
- self._lst.extend(self._iterator)
+ def visit_chunk_view(self, array_view: CArrayView) -> None:
+ self._visitor.visit_chunk_view(array_view)
- def finish(self) -> List:
- return self._lst
+ def finish(self) -> Any:
+ return self._visitor.finish()
class ColumnsBuilder(ArrayStreamVisitor):
Review Comment:
I'll add that I also had a bit of trouble comprehending the naming. I think
it might be worthwhile to rename "column" to something else. Technically, its
just a contiguous array, but `array` is overloaded. Are these technically all
concatenation helpers?
##########
python/src/nanoarrow/visitor.py:
##########
@@ -15,68 +15,186 @@
# specific language governing permissions and limitations
# under the License.
-from typing import Any, List, Sequence, Tuple, Union
+from typing import Any, Callable, List, Sequence, Tuple, Union
-from nanoarrow._lib import CArrayView
+from nanoarrow._lib import CArrayView, CArrowType, CBuffer, CBufferBuilder
from nanoarrow.c_array_stream import c_array_stream
+from nanoarrow.c_schema import c_schema_view
from nanoarrow.iterator import ArrayViewBaseIterator, PyIterator
from nanoarrow.schema import Type
-def to_pylist(obj, schema=None) -> List:
- """Convert ``obj`` to a ``list()` of Python objects
+class ArrayViewVisitable:
+ """Mixin class providing conversion methods based on visitors
- Computes an identical value to ``list(iterator.iter_py())`` but is several
- times faster.
+ Can be used with classes that implement ``__arrow_c_stream__()``
+ or ``__arrow_c_array__()``.
+ """
+
+ def to_pylist(self) -> List:
+ """Convert to a ``list`` of Python objects
+
+ Computes an identical value to ``list(iter_py())`` but can be much
+ faster.
+
+ Examples
+ --------
+ >>> import nanoarrow as na
+ >>> from nanoarrow import visitor
+ >>> array = na.Array([1, 2, 3], na.int32())
+ >>> array.to_pylist()
+ [1, 2, 3]
+ """
+ return ListBuilder.visit(self)
+
+ def to_column_list(self, handle_nulls=None) -> Tuple[List[str],
List[Sequence]]:
+ """Convert to a ``list`` of contiguous sequences
+
+ Converts a stream of struct arrays into its column-wise representation
+ according to :meth:`to_column`.
+
+ Paramters
+ ---------
+ handle_nulls : callable
+ A function returning a sequence based on a validity bytemap and a
+ contiguous buffer of values. If the array contains no nulls, the
+ validity bytemap will be ``None``. Built-in handlers include
+ :func:`nulls_as_sentinel`, :func:`nulls_forbid`, and
+ :func:`nulls_separate`). The default value is :func:`nulls_forbid`.
+
+ Examples
+ --------
+ >>> import nanoarrow as na
+ >>> import pyarrow as pa
+ >>> batch = pa.record_batch({"col1": [1, 2, 3], "col2": ["a", "b",
"c"]})
+ >>> names, columns = na.Array(batch).to_column_list()
+ >>> names
+ ['col1', 'col2']
+ >>> columns
+ [nanoarrow.c_lib.CBuffer(int64[24 b] 1 2 3), ['a', 'b', 'c']]
+ """
+ return ColumnsBuilder.visit(self, handle_nulls=handle_nulls)
+
+ def to_column(self, handle_nulls=None) -> Sequence:
Review Comment:
Am I correct in thinking this is a concatenation function? e.g. chunked
array -> array
##########
python/src/nanoarrow/visitor.py:
##########
@@ -15,68 +15,186 @@
# specific language governing permissions and limitations
# under the License.
-from typing import Any, List, Sequence, Tuple, Union
+from typing import Any, Callable, List, Sequence, Tuple, Union
-from nanoarrow._lib import CArrayView
+from nanoarrow._lib import CArrayView, CArrowType, CBuffer, CBufferBuilder
from nanoarrow.c_array_stream import c_array_stream
+from nanoarrow.c_schema import c_schema_view
from nanoarrow.iterator import ArrayViewBaseIterator, PyIterator
from nanoarrow.schema import Type
-def to_pylist(obj, schema=None) -> List:
- """Convert ``obj`` to a ``list()` of Python objects
+class ArrayViewVisitable:
+ """Mixin class providing conversion methods based on visitors
- Computes an identical value to ``list(iterator.iter_py())`` but is several
- times faster.
+ Can be used with classes that implement ``__arrow_c_stream__()``
+ or ``__arrow_c_array__()``.
+ """
+
+ def to_pylist(self) -> List:
+ """Convert to a ``list`` of Python objects
+
+ Computes an identical value to ``list(iter_py())`` but can be much
+ faster.
+
+ Examples
+ --------
+ >>> import nanoarrow as na
+ >>> from nanoarrow import visitor
+ >>> array = na.Array([1, 2, 3], na.int32())
+ >>> array.to_pylist()
+ [1, 2, 3]
+ """
+ return ListBuilder.visit(self)
+
+ def to_column_list(self, handle_nulls=None) -> Tuple[List[str],
List[Sequence]]:
Review Comment:
would `to_table` potentially be more descriptive?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]