alkis commented on code in PR #14049: URL: https://github.com/apache/arrow/pull/14049#discussion_r1125751843
########## cpp/src/arrow/compute/exec/window_functions/bit_vector_navigator.h: ########## @@ -0,0 +1,513 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <cstdint> +#include "arrow/compute/exec/util.h" +#include "arrow/util/bit_util.h" + +namespace arrow { +namespace compute { + +// Storage for a bit vector to be used with BitVectorNavigator and its variants. +// +// Supports weaved bit vectors. +// +class BitVectorWithCountsBase { + template <bool T> + friend class BitVectorNavigatorImp; + + public: + BitVectorWithCountsBase() : num_children_(0), num_bits_per_child_(0) {} + + void Resize(int64_t num_bits_per_child, int64_t num_children = 1) { + ARROW_DCHECK(num_children > 0 && num_bits_per_child > 0); + num_children_ = num_children; + num_bits_per_child_ = num_bits_per_child; + int64_t num_words = + bit_util::CeilDiv(num_bits_per_child, kBitsPerWord) * num_children; + bits_.resize(num_words); + mid_counts_.resize(num_words); + int64_t num_blocks = + bit_util::CeilDiv(num_bits_per_child, kBitsPerBlock) * num_children; + top_counts_.resize(num_blocks); + } + + void ClearBits() { memset(bits_.data(), 0, bits_.size() * sizeof(bits_[0])); } + + // Word is 64 adjacent bits + // + static constexpr int64_t kBitsPerWord = 64; + // Block is 65536 adjacent bits + // (that means that 16-bit counters can be used within the block) + // +#ifndef NDEBUG + static constexpr int kLogBitsPerBlock = 7; +#else + static constexpr int kLogBitsPerBlock = 16; +#endif + static constexpr int64_t kBitsPerBlock = 1LL << kLogBitsPerBlock; + + protected: + int64_t num_children_; + int64_t num_bits_per_child_; + // TODO: Replace vectors with ResizableBuffers. Return error status from + // Resize on out-of-memory. + // + std::vector<uint64_t> bits_; + std::vector<int64_t> top_counts_; + std::vector<uint16_t> mid_counts_; +}; + +template <bool SINGLE_CHILD_BIT_VECTOR> +class BitVectorNavigatorImp { + public: + BitVectorNavigatorImp() : container_(NULLPTR) {} + + BitVectorNavigatorImp(BitVectorWithCountsBase* container, int64_t child_index) + : container_(container), child_index_(child_index) {} + + int64_t block_count() const { + return bit_util::CeilDiv(container_->num_bits_per_child_, Review Comment: `CeilDiv` takes arguments as signed integers so it ends up generating suboptimal code. Because of the above, the codegen here and below has unnecessary branches: https://godbolt.org/z/6Me84646P -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
