[
https://issues.apache.org/jira/browse/ARROW-1828?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16263325#comment-16263325
]
ASF GitHub Bot commented on ARROW-1828:
---------------------------------------
wesm commented on a change in pull request #1350: ARROW-1828: [C++] Hash kernel
specialization for BooleanType
URL: https://github.com/apache/arrow/pull/1350#discussion_r152676160
##########
File path: cpp/src/arrow/compute/kernels/hash.cc
##########
@@ -368,6 +368,79 @@ class HashTableKernel<Type, Action,
enable_if_has_c_type<Type>> : public HashTab
HashDictionary<Type> dict_;
};
+// ----------------------------------------------------------------------
+// Hash table for boolean types
+
+template <typename Type, typename Action>
+class HashTableKernel<Type, Action, enable_if_boolean<Type>> : public
HashTable {
+ public:
+ HashTableKernel(const std::shared_ptr<DataType>& type, MemoryPool* pool)
+ : HashTable(type, pool) {
+ std::fill(table_, table_ + 2, kHashSlotEmpty);
+ }
+
+ Status Append(const ArrayData& arr) override {
+ auto action = static_cast<Action*>(this);
+
+ RETURN_NOT_OK(action->Reserve(arr.length));
+
+ internal::BitmapReader value_reader(arr.buffers[1]->data(), arr.offset,
arr.length);
+
+#define HASH_INNER_LOOP() \
+ if (slot == kHashSlotEmpty) { \
+ if (!Action::allow_expand) { \
+ throw HashException("Encountered new dictionary value"); \
+ } \
+ table_[j] = slot = static_cast<hash_slot_t>(dict_.size()); \
+ dict_.push_back(value); \
+ action->ObserveNotFound(slot); \
+ } else { \
+ action->ObserveFound(slot); \
+ }
+
+ if (arr.null_count != 0) {
+ internal::BitmapReader valid_reader(arr.buffers[0]->data(), arr.offset,
arr.length);
+ for (int64_t i = 0; i < arr.length; ++i) {
+ const bool is_null = valid_reader.IsNotSet();
+ const bool value = value_reader.IsSet();
+ const int j = value ? 1 : 0;
+ hash_slot_t slot = table_[j];
+ valid_reader.Next();
+ value_reader.Next();
+ if (is_null) {
+ action->ObserveNull();
+ continue;
+ }
+ HASH_INNER_LOOP();
+ }
+ } else {
+ for (int64_t i = 0; i < arr.length; ++i) {
+ const bool value = value_reader.IsSet();
+ const int j = value ? 1 : 0;
+ hash_slot_t slot = table_[j];
+ value_reader.Next();
+ HASH_INNER_LOOP();
+ }
+ }
Review comment:
The macro strategy used elsewhere doesn't quite work here because the the
bit reader for the data has to be advanced. We can address this in later
refactoring...
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> [C++] Implement hash kernel specialization for BooleanType
> ----------------------------------------------------------
>
> Key: ARROW-1828
> URL: https://issues.apache.org/jira/browse/ARROW-1828
> Project: Apache Arrow
> Issue Type: Improvement
> Components: C++
> Reporter: Wes McKinney
> Assignee: Wes McKinney
> Labels: pull-request-available
> Fix For: 0.8.0
>
>
> Follow up to ARROW-1559
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)