kou commented on code in PR #38681:
URL: https://github.com/apache/arrow/pull/38681#discussion_r1390549940
##########
cpp/src/arrow/array/array_dict.cc:
##########
@@ -212,6 +212,57 @@ Result<std::shared_ptr<ArrayData>> TransposeDictIndices(
return out_data;
}
+struct CountDictionaryArrayNullValuesVistor {
+ const std::shared_ptr<ArrayData>& data;
+ int64_t& out_null_count;
+
+ template <typename IndexArrowType>
+ Status CountDictionaryArrayNullValuesImpl() {
+ auto index_length = data->length;
+ auto dict_length = data->dictionary->length;
+ const auto* dictionary_null_bit_map =
data->dictionary->GetValues<uint8_t>(0);
+
+ using CType = typename IndexArrowType::c_type;
+ const CType* indices_data = data->GetValues<CType>(1);
+ CType dict_len = static_cast<CType>(dict_length);
+ for (int64_t i = 0; i < index_length; i++) {
+ if (data->IsNull(i)) {
+ out_null_count++;
+ continue;
+ }
+
+ CType current_index = indices_data[i];
+ if (current_index < 0 || current_index >= dict_len) {
+ return Status::IndexError(
+ "Index out of bounds while counting dictionary array: ",
current_index,
+ "(dictionary is ", dict_length, " long) at position ", i);
Review Comment:
```suggestion
" (dictionary is ", dict_length, " long) at position ", i);
```
##########
cpp/src/arrow/array/array_dict.cc:
##########
@@ -323,6 +374,15 @@ Result<std::shared_ptr<Array>> DictionaryArray::Transpose(
return MakeArray(std::move(transposed));
}
+Result<int64_t> DictionaryArray::CountNullValues() const {
+ if (this->dictionary()->null_count() == 0 || this->indices()->length() == 0)
{
+ return this->indices()->null_count();
+ }
+
+ ARROW_ASSIGN_OR_RAISE(int64_t null_count,
CountDictionaryArrayNullValues(data_));
+ return null_count;
Review Comment:
```suggestion
return CountDictionaryArrayNullValues(data_);
```
##########
cpp/src/arrow/array/array_dict.cc:
##########
@@ -212,6 +212,57 @@ Result<std::shared_ptr<ArrayData>> TransposeDictIndices(
return out_data;
}
+struct CountDictionaryArrayNullValuesVistor {
+ const std::shared_ptr<ArrayData>& data;
+ int64_t& out_null_count;
+
+ template <typename IndexArrowType>
+ Status CountDictionaryArrayNullValuesImpl() {
+ auto index_length = data->length;
+ auto dict_length = data->dictionary->length;
+ const auto* dictionary_null_bit_map =
data->dictionary->GetValues<uint8_t>(0);
+
+ using CType = typename IndexArrowType::c_type;
+ const CType* indices_data = data->GetValues<CType>(1);
+ CType dict_len = static_cast<CType>(dict_length);
+ for (int64_t i = 0; i < index_length; i++) {
+ if (data->IsNull(i)) {
+ out_null_count++;
+ continue;
+ }
+
+ CType current_index = indices_data[i];
+ if (current_index < 0 || current_index >= dict_len) {
+ return Status::IndexError(
+ "Index out of bounds while counting dictionary array: ",
current_index,
+ "(dictionary is ", dict_length, " long) at position ", i);
+ }
+ if (!bit_util::GetBit(dictionary_null_bit_map, current_index)) {
+ out_null_count++;
+ }
+ }
+ return Status::OK();
+ }
+
+ template <typename Type>
+ enable_if_integer<Type, Status> Visit(const Type&) {
+ return CountDictionaryArrayNullValuesImpl<Type>();
+ }
+
+ Status Visit(const DataType& type) {
+ return Status::TypeError("Expected an Index Type of Int or UInt, but got a
type:",
Review Comment:
```suggestion
return Status::TypeError("Expected an Index Type of Int or UInt, but got
a type: ",
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]