kou commented on code in PR #35655:
URL: https://github.com/apache/arrow/pull/35655#discussion_r1204872448


##########
matlab/src/cpp/arrow/matlab/array/proxy/array.cc:
##########
@@ -40,4 +43,25 @@ namespace arrow::matlab::array::proxy {
         auto length_mda = factory.createScalar(array->length());
         context.outputs[0] = length_mda;
     }
+
+    void Array::valid(libmexclass::proxy::method::Context& context) {
+        size_t array_length = static_cast<size_t>(array->length());
+        
+        // If the Arrow array has no null values, then return a MATLAB
+        // logical array that is all "true" for the validity bitmap.
+        if (array->null_count() == 0) {
+            ::matlab::data::ArrayFactory factory;
+            auto validity_buffer = factory.createBuffer<bool>(array_length);
+            auto validity_buffer_ptr = validity_buffer.get();
+            std::fill(validity_buffer_ptr, validity_buffer_ptr + array_length, 
true);
+            ::matlab::data::TypedArray<bool> valid_elements_mda = 
factory.createArrayFromBuffer<bool>({array_length, 1}, 
std::move(validity_buffer));
+            context.outputs[0] = valid_elements_mda;
+            return;
+        }
+
+        auto validity_bitmap = array->null_bitmap();
+        ::matlab::data::TypedArray<bool> valid_elements_mda = 
arrow::matlab::bit::bitUnpackArrowBuffer(validity_bitmap, array_length);

Review Comment:
   Ditto.



##########
matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h:
##########
@@ -68,12 +81,11 @@ class NumericArray : public 
arrow::matlab::array::proxy::Array {
                 auto data_buffer = 
std::make_shared<arrow::Buffer>(reinterpret_cast<const uint8_t*>(dt),
                                                               sizeof(CType) * 
numeric_mda.getNumberOfElements());
 
-                // TODO: Implement null support
-                std::shared_ptr<arrow::Buffer> null_buffer = nullptr;
+                // Pack the validity bitmap values.
+                auto packed_validity_bitmap = has_validity_bitmap ? 
arrow::matlab::bit::bitPackMatlabLogicalArray(constructor_arguments[2]).ValueOrDie()
 : nullptr;

Review Comment:
   In general, we should not call `ValueOrDie()` without checking `ok()` in 
library. If `!ok()`, an user program is aborted.
   
   Can we raise an exception instead of abort on error?
   (This may be a follow-up task.)



##########
matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h:
##########
@@ -26,11 +26,19 @@
 #include "arrow/type_traits.h"
 
 #include "arrow/matlab/array/proxy/array.h"
+#include "arrow/matlab/bit/bit_pack_matlab_logical_array.h"
 
 #include "libmexclass/proxy/Proxy.h"
 
 namespace arrow::matlab::array::proxy {
 
+namespace {
+const uint8_t* getUnpackedValidityBitmap(const 
::matlab::data::TypedArray<bool>& valid_elements) {
+    const auto valid_elements_iterator(valid_elements.cbegin());
+    return reinterpret_cast<const 
uint8_t*>(valid_elements_iterator.operator->());

Review Comment:
   Wow!
   
   `::matlab::data::TypedArray` doesn't provide a method that returns it's raw 
data, right?



##########
matlab/src/cpp/arrow/matlab/array/proxy/array.cc:
##########
@@ -40,4 +43,25 @@ namespace arrow::matlab::array::proxy {
         auto length_mda = factory.createScalar(array->length());
         context.outputs[0] = length_mda;
     }
+
+    void Array::valid(libmexclass::proxy::method::Context& context) {
+        size_t array_length = static_cast<size_t>(array->length());
+        
+        // If the Arrow array has no null values, then return a MATLAB
+        // logical array that is all "true" for the validity bitmap.
+        if (array->null_count() == 0) {
+            ::matlab::data::ArrayFactory factory;
+            auto validity_buffer = factory.createBuffer<bool>(array_length);
+            auto validity_buffer_ptr = validity_buffer.get();
+            std::fill(validity_buffer_ptr, validity_buffer_ptr + array_length, 
true);
+            ::matlab::data::TypedArray<bool> valid_elements_mda = 
factory.createArrayFromBuffer<bool>({array_length, 1}, 
std::move(validity_buffer));

Review Comment:
   Can we use `auto` here?
   
   ```suggestion
               auto valid_elements_mda = 
factory.createArrayFromBuffer<bool>({array_length, 1}, 
std::move(validity_buffer));
   ```



##########
matlab/src/cpp/arrow/matlab/bit/bit_pack_matlab_logical_array.cc:
##########
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cmath> // std::ceil
+
+#include <arrow/util/bit_util.h>
+#include <arrow/util/bitmap_generate.h>
+
+#include "arrow/matlab/bit/bit_pack_matlab_logical_array.h"
+
+namespace arrow::matlab::bit {
+
+    // Calculate the number of bytes required in the bit-packed validity 
buffer.
+    int64_t bitPackedLength(int64_t num_elements) {
+        // Since MATLAB logical values are encoded using a full byte (8 bits),
+        // we can divide the number of elements in the logical array by 8 to 
get
+        // the bit packed length.
+        return static_cast<int64_t>(std::ceil(num_elements / 8.0));
+    }
+
+    // Pack an unpacked MATLAB logical array into into a bit-packed 
arrow::Buffer.
+    arrow::Result<std::shared_ptr<arrow::Buffer>> 
bitPackMatlabLogicalArray(const ::matlab::data::TypedArray<bool> 
matlab_logical_array) {
+        // Validate that the input arrow::Buffer has sufficient size to store 
a full bit-packed
+        // representation of the input MATLAB logical array.
+        const auto unpacked_buffer_length = 
matlab_logical_array.getNumberOfElements();
+
+        // Compute the bit packed length from the unpacked length.
+        const auto packed_buffer_length = 
bitPackedLength(unpacked_buffer_length);
+
+        ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::ResizableBuffer> 
packed_validity_bitmap_buffer,  
arrow::AllocateResizableBuffer(packed_buffer_length));
+
+        // Get pointers to the internal uint8_t arrays behind arrow::Buffer 
and mxArray
+        // Get raw bool array pointer from MATLAB logical array.
+        // Get an iterator to the raw bool data behind the MATLAB logical 
array.
+        auto unpacked_bool_data_iterator = matlab_logical_array.cbegin();
+
+        // Iterate over the mxLogical array and write bit-packed bools to the 
arrow::Buffer.
+        // Call into a loop-unrolled Arrow utility for better performance when 
bit-packing.
+        auto generator = [&]() -> bool { return 
*(unpacked_bool_data_iterator++); };
+        const int64_t start_offset = 0;
+
+        auto mutable_data = packed_validity_bitmap_buffer->mutable_data();
+
+        arrow::internal::GenerateBitsUnrolled(mutable_data, start_offset, 
unpacked_buffer_length, generator);
+
+        auto buffer_result = 
std::static_pointer_cast<arrow::Buffer>(packed_validity_bitmap_buffer);
+        return arrow::Result<std::shared_ptr<arrow::Buffer>>{buffer_result};

Review Comment:
   Can we simplify this?
   
   * `std::shared_ptr<arrow::ResizableBuffer> packed_validity_bitmap_buffer` -> 
`auto packed_validity_bitmap_buffer`
   *  `auto buffer_result = 
std::static_pointer_cast<arrow::Buffer>(packed_validity_bitmap_buffer); return 
arrow::Result<std::shared_ptr<arrow::Buffer>>{buffer_result};` -> `return 
packed_validity_bitmap_buffer`
   
   ```suggestion
           ARROW_ASSIGN_OR_RAISE(auto packed_validity_bitmap_buffer,  
arrow::AllocateResizableBuffer(packed_buffer_length));
   
           // Get pointers to the internal uint8_t arrays behind arrow::Buffer 
and mxArray
           // Get raw bool array pointer from MATLAB logical array.
           // Get an iterator to the raw bool data behind the MATLAB logical 
array.
           auto unpacked_bool_data_iterator = matlab_logical_array.cbegin();
   
           // Iterate over the mxLogical array and write bit-packed bools to 
the arrow::Buffer.
           // Call into a loop-unrolled Arrow utility for better performance 
when bit-packing.
           auto generator = [&]() -> bool { return 
*(unpacked_bool_data_iterator++); };
           const int64_t start_offset = 0;
   
           auto mutable_data = packed_validity_bitmap_buffer->mutable_data();
   
           arrow::internal::GenerateBitsUnrolled(mutable_data, start_offset, 
unpacked_buffer_length, generator);
   
           return packed_validity_bitmap_buffer;
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to