pitrou commented on code in PR #13894:
URL: https://github.com/apache/arrow/pull/13894#discussion_r955108267


##########
python/pyarrow/array.pxi:
##########
@@ -2004,7 +2008,7 @@ cdef class LargeListArray(BaseListArray):
     """
 
     @staticmethod
-    def from_arrays(offsets, values, DataType type=None, MemoryPool pool=None):
+    def from_arrays(offsets, values, DataType type=None, MemoryPool pool=None, 
mask=None):
         """
         Construct LargeListArray from arrays of int64 offsets and values.

Review Comment:
   Here as well, the docstring should be updated for the new argument.



##########
cpp/src/arrow/array/array_list_test.cc:
##########
@@ -197,6 +197,37 @@ class TestListArray : public ::testing::Test {
     EXPECT_FALSE(left->Slice(offset)->Equals(right->Slice(offset)));
   }
 
+  void TestFromArraysWithNullBitMap() {
+    std::shared_ptr<Array> offsets_w_nulls, offsets_wo_nulls, values;
+
+    std::vector<offset_type> offsets = {0, 1, 1, 3, 4};
+
+    std::vector<bool> offsets_w_nulls_flags = {true, false, true, true, true};
+    std::vector<bool> offsets_wo_nulls_flags = {true, true, true, true, true};
+
+    std::vector<int32_t> null_bitmap_values = {1, 0, 1, 1};
+    std::shared_ptr<Buffer> null_bitmap = Buffer::Wrap(null_bitmap_values);
+
+    ArrayFromVector<OffsetType, offset_type>(offsets_w_nulls_flags, offsets,
+                                             &offsets_w_nulls);
+    ArrayFromVector<OffsetType, offset_type>(offsets_wo_nulls_flags, offsets,
+                                             &offsets_wo_nulls);
+
+    auto type = std::make_shared<T>(int32());
+    auto expected = std::dynamic_pointer_cast<ArrayType>(
+        ArrayFromJSON(type, "[[0], null, [0, null], [0]]"));
+
+    values = expected->values();
+
+    ASSERT_OK(ArrayType::FromArrays(*offsets_w_nulls, *values, pool_));
+    ASSERT_OK(ArrayType::FromArrays(*offsets_wo_nulls, *values, pool_));

Review Comment:
   Ok, but can we also test the array that was just created? See 
`TestFromArrays` below for use of `ValidateFull` and `AssertArraysEqual`.
   



##########
python/pyarrow/tests/test_array.py:
##########
@@ -919,6 +919,53 @@ def test_list_from_arrays(list_array_type, 
list_type_factory):
         list_array_type.from_arrays(offsets, values, type=typ)
 
 
[email protected](('list_array_type', 'list_type_factory'), (
+    (pa.ListArray, pa.list_),
+    (pa.LargeListArray, pa.large_list)
+))
[email protected]("arr", (
+    [None, [0]],
+    [None, [0, None], [0]],
+    [[0], [1]],
+))
+def test_list_array_types_from_arrays(
+    list_array_type, list_type_factory, arr
+):
+    arr = pa.array(arr, list_type_factory(pa.int8()))
+    reconstructed_arr = list_array_type.from_arrays(
+        arr.offsets, arr.values, mask=arr.is_null())
+    assert arr == reconstructed_arr

Review Comment:
   I'm curious, does it work if the array is sliced? e.g.:
   ```suggestion
       assert arr == reconstructed_arr
       arr = arr[1:]
       reconstructed_arr = list_array_type.from_arrays(
           arr.offsets, arr.values, mask=arr.is_null())
       assert arr == reconstructed_arr
   ```



##########
cpp/src/arrow/array/array_list_test.cc:
##########
@@ -197,6 +197,37 @@ class TestListArray : public ::testing::Test {
     EXPECT_FALSE(left->Slice(offset)->Equals(right->Slice(offset)));
   }
 
+  void TestFromArraysWithNullBitMap() {
+    std::shared_ptr<Array> offsets_w_nulls, offsets_wo_nulls, values;
+
+    std::vector<offset_type> offsets = {0, 1, 1, 3, 4};
+
+    std::vector<bool> offsets_w_nulls_flags = {true, false, true, true, true};
+    std::vector<bool> offsets_wo_nulls_flags = {true, true, true, true, true};
+
+    std::vector<int32_t> null_bitmap_values = {1, 0, 1, 1};

Review Comment:
   I guess this is ok as an arbitrary null bitmap buffer, but for a 4-entries 
array this will be all ones, right?



##########
cpp/src/arrow/array/array_list_test.cc:
##########
@@ -197,6 +197,37 @@ class TestListArray : public ::testing::Test {
     EXPECT_FALSE(left->Slice(offset)->Equals(right->Slice(offset)));
   }
 
+  void TestFromArraysWithNullBitMap() {
+    std::shared_ptr<Array> offsets_w_nulls, offsets_wo_nulls, values;
+
+    std::vector<offset_type> offsets = {0, 1, 1, 3, 4};
+
+    std::vector<bool> offsets_w_nulls_flags = {true, false, true, true, true};
+    std::vector<bool> offsets_wo_nulls_flags = {true, true, true, true, true};

Review Comment:
   I don't think this one is required, you can just call `ArrayFromVector` 
without a third argument.



##########
python/pyarrow/array.pxi:
##########
@@ -2812,6 +2810,25 @@ cdef dict _array_classes = {
 }
 
 
+cdef inline shared_ptr[CBuffer] c_mask_inverted_from_obj(object mask, 
MemoryPool pool) except *:
+    """
+    Convert mask array obj to c_mask while also inverting to signify 1 for 
valid and 0 for null
+    """
+    cdef shared_ptr[CBuffer] c_mask
+    if mask is None:
+        c_mask = shared_ptr[CBuffer]()
+    elif isinstance(mask, Array):
+        if mask.type.id != Type_BOOL:
+            raise TypeError('Mask must be a pyarrow.Array of type boolean')
+        if mask.null_count != 0:
+            raise ValueError('Mask must not contain nulls')
+        inverted_mask = _pc().invert(mask, memory_pool=pool)
+        c_mask = pyarrow_unwrap_buffer(inverted_mask.buffers()[1])
+    else:
+        raise TypeError('Mask must be a pyarrow.Array of type bool')

Review Comment:
   ```suggestion
           raise TypeError('Mask must be a pyarrow.Array of type boolean')
   ```



##########
cpp/src/arrow/array/array_list_test.cc:
##########
@@ -197,6 +197,37 @@ class TestListArray : public ::testing::Test {
     EXPECT_FALSE(left->Slice(offset)->Equals(right->Slice(offset)));
   }
 
+  void TestFromArraysWithNullBitMap() {
+    std::shared_ptr<Array> offsets_w_nulls, offsets_wo_nulls, values;
+
+    std::vector<offset_type> offsets = {0, 1, 1, 3, 4};
+
+    std::vector<bool> offsets_w_nulls_flags = {true, false, true, true, true};

Review Comment:
   The convention in other tests is to call this `xxx_is_valid`:
   ```suggestion
       std::vector<bool> offsets_w_nulls_is_valid = {true, false, true, true, 
true};
   ```



##########
python/pyarrow/array.pxi:
##########
@@ -1902,6 +1902,7 @@ cdef class ListArray(BaseListArray):
             If not specified, a default ListType with the values' type is
             used.
         pool : MemoryPool
+        mask : Array (bool type)

Review Comment:
   I would spell it "boolean" (to avoid confusions with Python's `bool` type) 
and would also describe the argument a bit more:
   ```suggestion
           mask : Array (boolean type), optional
               ... (fill this :-))
   ```



##########
cpp/src/arrow/array/array_list_test.cc:
##########
@@ -539,6 +570,10 @@ TYPED_TEST(TestListArray, ValuesEquality) { 
this->TestValuesEquality(); }
 
 TYPED_TEST(TestListArray, FromArrays) { this->TestFromArrays(); }
 
+TYPED_TEST(TestListArray, TestFromArraysWithNullBitMap) {

Review Comment:
   Nit :-)
   ```suggestion
   TYPED_TEST(TestListArray, FromArraysWithNullBitMap) {
   ```



##########
cpp/src/arrow/array/array_nested.h:
##########
@@ -179,12 +187,15 @@ class ARROW_EXPORT LargeListArray : public 
BaseListArray<LargeListType> {
   /// \param[in] pool MemoryPool in case new offsets array needs to be

Review Comment:
   CI is complaining that you didn't document the new parameters here...
   
https://github.com/apache/arrow/runs/8012669293?check_suite_focus=true#step:6:2925



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to