bkietz commented on a change in pull request #10858:
URL: https://github.com/apache/arrow/pull/10858#discussion_r691462649
##########
File path: cpp/src/arrow/compute/exec/key_map.cc
##########
@@ -91,100 +94,197 @@ inline void SwissTable::search_block(uint64_t block, int
stamp, int start_slot,
*out_slot = static_cast<int>(CountLeadingZeros(matches | block_high_bits) >>
3);
}
-// This call follows the call to search_block.
-// The input slot index is the output returned by it, which is a value from 0
to 8,
-// with 8 indicating that both: no match was found and there were no empty
slots.
-//
-// If the slot corresponds to a non-empty slot return a group id associated
with it.
-// Otherwise return any group id from any of the slots or
-// zero, which is the default value stored in empty slots.
-//
inline uint64_t SwissTable::extract_group_id(const uint8_t* block_ptr, int
slot,
- uint64_t group_id_mask) {
- // Input slot can be equal to 8, in which case we need to output any valid
group id
- // value, so we take the one from slot 0 in the block.
- int clamped_slot = slot & 7;
-
+ uint64_t group_id_mask) const {
// Group id values for all 8 slots in the block are bit-packed and follow
the status
// bytes. We assume here that the number of bits is rounded up to 8, 16, 32
or 64. In
// that case we can extract group id using aligned 64-bit word access.
- int num_groupid_bits = static_cast<int>(ARROW_POPCOUNT64(group_id_mask));
- ARROW_DCHECK(num_groupid_bits == 8 || num_groupid_bits == 16 ||
- num_groupid_bits == 32 || num_groupid_bits == 64);
+ int num_group_id_bits = static_cast<int>(ARROW_POPCOUNT64(group_id_mask));
+ ARROW_DCHECK(num_group_id_bits == 8 || num_group_id_bits == 16 ||
+ num_group_id_bits == 32 || num_group_id_bits == 64);
- int bit_offset = clamped_slot * num_groupid_bits;
+ int bit_offset = slot * num_group_id_bits;
const uint64_t* group_id_bytes =
reinterpret_cast<const uint64_t*>(block_ptr) + 1 + (bit_offset >> 6);
uint64_t group_id = (*group_id_bytes >> (bit_offset & 63)) & group_id_mask;
return group_id;
}
-// Return global slot id (the index including the information about the block)
-// where the search should continue if the first comparison fails.
-// This function always follows search_block and receives the slot id returned
by it.
-//
-inline uint64_t SwissTable::next_slot_to_visit(uint64_t block_index, int slot,
- int match_found) {
- // The result should be taken modulo the number of all slots in all blocks,
- // but here we allow it to take a value one above the last slot index.
- // Modulo operation is postponed to later.
- return block_index * 8 + slot + match_found;
+template <typename T, bool use_selection>
+void SwissTable::extract_group_ids_imp(const int num_keys, const uint16_t*
selection,
+ const uint32_t* hashes, const uint8_t*
local_slots,
+ uint32_t* out_group_ids, int
element_offset,
+ int element_multiplier) const {
+ const T* elements = reinterpret_cast<const T*>(blocks_) + element_offset;
+ if (log_blocks_ == 0) {
+ ARROW_DCHECK(sizeof(T) == sizeof(uint8_t));
+ for (int i = 0; i < num_keys; ++i) {
+ uint32_t id = use_selection ? selection[i] : i;
+ uint32_t group_id = blocks_[8 + local_slots[id]];
+ out_group_ids[id] = group_id;
+ }
+ } else {
+ for (int i = 0; i < num_keys; ++i) {
+ uint32_t id = use_selection ? selection[i] : i;
+ uint32_t hash = hashes[id];
+ int64_t pos =
+ (hash >> (bits_hash_ - log_blocks_)) * element_multiplier +
local_slots[id];
+ uint32_t group_id = static_cast<uint32_t>(elements[pos]);
+ ARROW_DCHECK(group_id < num_inserted_ || num_inserted_ == 0);
+ out_group_ids[id] = group_id;
+ }
+ }
}
-// Implements first (fast-path, optimistic) lookup.
-// Searches for a match only within the start block and
-// trying only the first slot with a matching stamp.
-//
-// Comparison callback needed for match verification is done outside of this
function.
-// Match bit vector filled by it only indicates finding a matching stamp in a
slot.
+void SwissTable::extract_group_ids(const int num_keys, const uint16_t*
optional_selection,
+ const uint32_t* hashes, const uint8_t*
local_slots,
+ uint32_t* out_group_ids) const {
+ // Group id values for all 8 slots in the block are bit-packed and follow
the status
+ // bytes. We assume here that the number of bits is rounded up to 8, 16, 32
or 64. In
+ // that case we can extract group id using aligned 64-bit word access.
+ int num_group_id_bits = num_groupid_bits_from_log_blocks(log_blocks_);
+ ARROW_DCHECK(num_group_id_bits == 8 || num_group_id_bits == 16 ||
+ num_group_id_bits == 32);
+
+ // Optimistically use simplified lookup involving only a start block to find
+ // a single group id candidate for every input.
+#if defined(ARROW_HAVE_AVX2)
+ int num_group_id_bytes = num_group_id_bits / 8;
+ if ((hardware_flags_ & arrow::internal::CpuInfo::AVX2) &&
!optional_selection) {
+ extract_group_ids_avx2(num_keys, hashes, local_slots, out_group_ids,
sizeof(uint64_t),
+ 8 + 8 * num_group_id_bytes, num_group_id_bytes);
+ } else {
Review comment:
Nit: I think it's best to keep macro guarded blocks contained
```suggestion
return;
}
```
##########
File path: cpp/src/arrow/compute/exec/key_encode.cc
##########
@@ -864,6 +864,17 @@ void KeyEncoder::EncoderBinaryPair::Encode(uint32_t
offset_within_row, KeyRowArr
EncodeImp_fn[dispatch_const](num_processed, offset_within_row, rows,
col_prep[0],
col_prep[1]);
}
+
+ DCHECK(temp1->metadata().is_fixed_length);
+ DCHECK(temp1->length() * temp1->metadata().fixed_length >=
Review comment:
Please use the more specific macros for DCHECK and ARROW_CHECK statements
```suggestion
DCHECK_GE(temp1->length() * temp1->metadata().fixed_length,
```
There is a lint check against this, but it occasionally fails to parse in
the presence of `operator->`
##########
File path: cpp/src/arrow/compute/exec/key_encode.cc
##########
@@ -1366,8 +1377,8 @@ void KeyEncoder::KeyRowMetadata::FromColumnMetadataVector(
// a) Boolean column, marked with fixed-length 0, is considered to have
fixed-length
// part of 1 byte. b) Columns with fixed-length part being power of 2 or
multiple of row
// alignment precede other columns. They are sorted among themselves based
on size of
- // fixed-length part. c) Fixed-length columns precede varying-length columns
when both
- // have the same size fixed-length part.
+ // fixed-length part decreasing. c) Fixed-length columns precede
varying-length columns
Review comment:
"They are sorted in decreasing order of the size of their fixed-length
part"
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]