emkornfield commented on a change in pull request #6985: URL: https://github.com/apache/arrow/pull/6985#discussion_r419855848
########## File path: cpp/src/arrow/util/bit_util.h ########## @@ -610,6 +618,71 @@ class FirstTimeBitmapWriter { } } + /// Appends number_of_bits from word to valid_bits and valid_bits_offset. + /// + /// \param[in] word The LSB bitmap to append. Any bits past number_of_bits are assumed + /// to be unset (i.e. 0). + /// \param[in] number_of_bits The number of bits to append from word. + void AppendWord(uint64_t word, int64_t number_of_bits) { +#if defined(ARROW_LITTLE_ENDIAN) + if (ARROW_PREDICT_FALSE(number_of_bits == 0)) { + return; + } + + // Location that the first byte needs to be written to. + uint8_t* append_position = bitmap_ + byte_offset_; + + // Update state variables except for current_byte_ here. + position_ += number_of_bits; + int64_t bit_offset = BitUtil::CountTrailingZeros(static_cast<uint32_t>(bit_mask_)); + bit_mask_ = BitUtil::kBitmask[(bit_offset + number_of_bits) % 8]; + byte_offset_ += (bit_offset + number_of_bits) / 8; + + if (bit_offset != 0) { + // We are in the middle of the byte. This code updates the byte and shifts + // bits appropriately within word so it can be memcpy'd below. + int64_t bits_to_carry = 8 - bit_offset; + // Carry over bits from word to current_byte_. We assume any extra bits in word + // unset so no additional accounting is needed for when number_of_bits < + // bits_to_carry. + current_byte_ |= (word & BitUtil::kPrecedingBitmask[bits_to_carry]) << bit_offset; + // Check if everything is transfered into current_byte_. + if (ARROW_PREDICT_FALSE(number_of_bits < bits_to_carry)) { + return; + } + *append_position = current_byte_; + append_position++; + // Move the carry bits off of word. + word = word >> bits_to_carry; + number_of_bits -= bits_to_carry; + } + + int64_t bytes_for_word = ::arrow::BitUtil::BytesForBits(number_of_bits); + std::memcpy(append_position, &word, bytes_for_word); + // At this point, the previous current_byte_ has been written to bitmap_. + // The new current_byte_ is either the last relevant byte in 'word' + // or cleared if the new position is byte aligned (i.e. a fresh byte). + if (bit_mask_ == 0x1) { + current_byte_ = 0; + } else { + // current_byte_ = *(reinterpret_cast<uint8_t*>(&word) + bytes_for_word - 1) : 0; + current_byte_ = *(append_position + bytes_for_word - 1); + } + +#else // big-endian + for (int x = 0; x < number_of_bits; x++) { Review comment: maybe I should just remove this since I can't verify? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org