kou commented on code in PR #38681:
URL: https://github.com/apache/arrow/pull/38681#discussion_r1390472002


##########
cpp/src/arrow/array/array_dict_test.cc:
##########
@@ -1428,6 +1428,47 @@ TEST(TestDictionary, IndicesArray) {
   ASSERT_OK(arr->indices()->ValidateFull());
 }
 
+void CheckDictionaryCountNullValues(const std::shared_ptr<DataType>& dict_type,
+                                    const std::string& input_dictionary_json,
+                                    const std::string& input_index_json,
+                                    const int64_t& expected_null_count) {
+  auto input = DictArrayFromJSON(dict_type, input_index_json, 
input_dictionary_json);
+  const DictionaryArray& input_ref = checked_cast<const 
DictionaryArray&>(*input);

Review Comment:
   ```suggestion
     const auto& input_ref = checked_cast<const DictionaryArray&>(*input);
   ```



##########
cpp/src/arrow/array/array_dict.cc:
##########
@@ -212,6 +212,56 @@ Result<std::shared_ptr<ArrayData>> TransposeDictIndices(
   return out_data;
 }
 
+struct CountDictionaryNullValuesVistor {
+  const std::shared_ptr<ArrayData>& data;
+  int64_t& out_null_count;
+
+  template <typename IndexArrowType>
+  Status CountDictionaryNullValuesImpl() {
+    int64_t index_length = data->length;
+    int64_t dict_length = data->dictionary->length;
+    const uint8_t* dictionary_null_bit_map = 
data->dictionary->GetValues<uint8_t>(0);
+
+    using CType = typename IndexArrowType::c_type;
+    const CType* indices_data = data->GetValues<CType>(1);
+    CType dict_len = static_cast<CType>(dict_length);
+    for (int64_t i = 0; i < index_length; i++) {
+      if (data->IsNull(i)) {
+        continue;

Review Comment:
   How about counting index nulls here too and returning 
`CountDictionaryNullValues()` result directly in 
`DictionaryArray::CountNullValues()`?
   
   ```suggestion
           out_null_count++;
           continue;
   ```



##########
cpp/src/arrow/array/array_dict.cc:
##########
@@ -212,6 +212,56 @@ Result<std::shared_ptr<ArrayData>> TransposeDictIndices(
   return out_data;
 }
 
+struct CountDictionaryNullValuesVistor {
+  const std::shared_ptr<ArrayData>& data;
+  int64_t& out_null_count;
+
+  template <typename IndexArrowType>
+  Status CountDictionaryNullValuesImpl() {
+    int64_t index_length = data->length;
+    int64_t dict_length = data->dictionary->length;
+    const uint8_t* dictionary_null_bit_map = 
data->dictionary->GetValues<uint8_t>(0);
+
+    using CType = typename IndexArrowType::c_type;
+    const CType* indices_data = data->GetValues<CType>(1);
+    CType dict_len = static_cast<CType>(dict_length);
+    for (int64_t i = 0; i < index_length; i++) {
+      if (data->IsNull(i)) {
+        continue;
+      }
+
+      CType current_index = indices_data[i];
+      if (current_index < 0 || current_index >= dict_len) {
+        return Status::IndexError(
+            "Index out of bounds while counting dictionary array: ", 
current_index,
+            "(dictionary is ", dict_length, " long) at position ", i);
+      }
+      if (!bit_util::GetBit(dictionary_null_bit_map, current_index)) {
+        out_null_count++;
+      }
+    }
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_integer<Type, Status> Visit(const Type&) {
+    return CountDictionaryNullValuesImpl<Type>();
+  }
+
+  Status Visit(const DataType& type) {
+    return Status::TypeError("Expected an Index Type of Int or UInt");

Review Comment:
   How about showing the actual type?
   
   ```suggestion
       return Status::TypeError("Expected an Index Type of Int or UInt: ", 
type);
   ```



##########
cpp/src/arrow/array/array_dict.cc:
##########
@@ -212,6 +212,56 @@ Result<std::shared_ptr<ArrayData>> TransposeDictIndices(
   return out_data;
 }
 
+struct CountDictionaryNullValuesVistor {
+  const std::shared_ptr<ArrayData>& data;
+  int64_t& out_null_count;
+
+  template <typename IndexArrowType>
+  Status CountDictionaryNullValuesImpl() {
+    int64_t index_length = data->length;
+    int64_t dict_length = data->dictionary->length;
+    const uint8_t* dictionary_null_bit_map = 
data->dictionary->GetValues<uint8_t>(0);

Review Comment:
   ```suggestion
       auto index_length = data->length;
       auto dict_length = data->dictionary->length;
       const auto* dictionary_null_bit_map = 
data->dictionary->GetValues<uint8_t>(0);
   ```



##########
cpp/src/arrow/array/array_dict_test.cc:
##########
@@ -1428,6 +1428,47 @@ TEST(TestDictionary, IndicesArray) {
   ASSERT_OK(arr->indices()->ValidateFull());
 }
 
+void CheckDictionaryCountNullValues(const std::shared_ptr<DataType>& dict_type,
+                                    const std::string& input_dictionary_json,
+                                    const std::string& input_index_json,
+                                    const int64_t& expected_null_count) {
+  auto input = DictArrayFromJSON(dict_type, input_index_json, 
input_dictionary_json);
+  const DictionaryArray& input_ref = checked_cast<const 
DictionaryArray&>(*input);
+
+  ASSERT_OK_AND_ASSIGN(int64_t actual, input_ref.CountNullValues());

Review Comment:
   ```suggestion
     ASSERT_OK_AND_ASSIGN(auto actual, input_ref.CountNullValues());
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to