paleolimbot commented on code in PR #464:
URL: https://github.com/apache/arrow-nanoarrow/pull/464#discussion_r1600362024


##########
python/src/nanoarrow/visitor.py:
##########
@@ -74,9 +75,60 @@ def to_columns(obj, schema=None) -> Tuple[List[str], 
List[Sequence]]:
     >>> names
     ['col1']
     >>> columns
-    [[1, 2, 3]]
+    [nanoarrow.c_lib.CBuffer(int64[24 b] 1 2 3)]
     """
-    return ColumnsBuilder.visit(obj, schema)
+    return ColumnsBuilder.visit(obj, schema, handle_nulls=handle_nulls)
+
+
+def nulls_forbid() -> Callable[[CBuffer, Sequence], Sequence]:
+    def handle(is_valid, data):
+        if len(is_valid) > 0:
+            raise ValueError("Null present with null_handler=nulls_forbid()")
+
+        return data
+
+    return handle
+
+
+def nulls_debug() -> Callable[[CBuffer, Sequence], Tuple[CBuffer, Sequence]]:
+    def handle(is_valid, data):
+        return is_valid, data
+
+    return handle
+
+
+def nulls_as_sentinel(sentinel=None):
+    from numpy import array, result_type
+
+    def handle(is_valid, data):
+        is_valid = array(is_valid, copy=False)
+        data = array(data, copy=False)
+
+        if len(is_valid) > 0:
+            out_type = result_type(data, sentinel)
+            data = array(data, dtype=out_type, copy=True)
+            data[~is_valid] = sentinel
+            return data
+        else:
+            return data
+
+    return handle
+
+
+def nulls_as_masked_array():

Review Comment:
   Good to know! I'll remove that then 🙂 
   
   > I wonder if it would be more useful to actually return each array as a 
tuple of data and mask
   
   Currently that would be `nulls_debug()`, mostly because I couldn't think of 
a good name (in a previous version it was `nulls_as_mask_and_data()`). 
Returning a tuple here sort of breaks the "every column is a sequence" 
guarantee. There could also be a `MaskedSequence` class that wraps the mask and 
data but that seems like opening a can of worms.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to