danepitkin commented on code in PR #456:
URL: https://github.com/apache/arrow-nanoarrow/pull/456#discussion_r1593149052


##########
python/src/nanoarrow/_lib.pyx:
##########
@@ -2921,6 +2972,32 @@ cdef class CMaterializedArrayStream:
         out._finalize()
         return out
 
+    @staticmethod
+    def from_c_arrays(arrays, CSchema schema, bint validate=True):
+        cdef CMaterializedArrayStream out = CMaterializedArrayStream()
+
+        cdef CArray array
+        for item in arrays:
+            array = item

Review Comment:
   nit: 
   ```suggestion
           for array in arrays:
   ```



##########
python/src/nanoarrow/_lib.pyx:
##########
@@ -2921,6 +2972,32 @@ cdef class CMaterializedArrayStream:
         out._finalize()
         return out
 
+    @staticmethod
+    def from_c_arrays(arrays, CSchema schema, bint validate=True):
+        cdef CMaterializedArrayStream out = CMaterializedArrayStream()
+
+        cdef CArray array
+        for item in arrays:
+            array = item
+
+            if array._ptr.length == 0:

Review Comment:
   optional: It could be nice to encapsulate this behind an `array.__len__()` 
function so you could say `len(array) == 0`. You could reuse this function 
below, too.



##########
python/src/nanoarrow/_lib.pyx:
##########
@@ -2700,20 +2729,36 @@ cdef class CArrayStream:
         return CArrayStream(base, <uintptr_t>c_array_stream_out)
 
     @staticmethod
-    def from_array_list(arrays, CSchema schema, move=False, validate=True):
+    def from_c_arrays(arrays, CSchema schema, move=False, validate=True):
         cdef ArrowArrayStream* c_array_stream_out
         base = alloc_c_array_stream(&c_array_stream_out)
 
-        if not move:
-            schema = schema.__deepcopy__()
-
-        cdef int code = ArrowBasicArrayStreamInit(c_array_stream_out, 
schema._ptr, len(arrays))
+        # Don't create more copies than we have to (but make sure
+        # one exists for validation if requested)
+        cdef CSchema out_schema = schema
+        if validate and not move:
+            validate_schema = schema
+            out_schema = schema.__deepcopy__()
+        elif validate:
+            validate_schema = schema.__deepcopy__()
+            out_schema = schema
+        elif not move:
+            out_schema = schema.__deepcopy__()
+
+        cdef int code = ArrowBasicArrayStreamInit(c_array_stream_out, 
out_schema._ptr, len(arrays))
         Error.raise_error_not_ok("ArrowBasicArrayStreamInit()", code)
 
         cdef ArrowArray tmp
         cdef CArray array
         for i in range(len(arrays)):
             array = arrays[i]
+
+            if validate and not validate_schema.type_equals(array.schema):
+                raise ValueError(
+                    f"Expected schema {validate_schema._to_string()} "
+                    f"but got {array.schema._to_string()}"
+                )

Review Comment:
   It might be nice to put this in a helper function since it's duplicated 
below. 



##########
python/tests/test_c_array_stream.py:
##########
@@ -144,12 +143,12 @@ def test_array_stream_from_arrays_validate():
     array_in = na.c_array([1, 2, 3], na.int32())
 
     # Check that we can skip validation and proceed without error
-    stream = CArrayStream.from_array_list([array_in], schema_in, 
validate=False)
+    stream = CArrayStream.from_c_arrays([array_in], schema_in, validate=False)
     arrays = list(stream)
     assert len(arrays) == 1
     assert arrays[0].n_buffers == 2
 
     # ...but that validation does happen by default
-    msg = "Expected array with 0 buffer"
-    with pytest.raises(NanoarrowException, match=msg):
-        CArrayStream.from_array_list([array_in], schema_in)
+    msg = "Expected schema na but got int32"

Review Comment:
   Could we make this error message more descriptive? `na` is overloaded with 
```import nanoarrow as na```. Perhaps `Expected schema: None, but got: int32`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to