jtuglu1 commented on code in PR #18952:
URL: https://github.com/apache/druid/pull/18952#discussion_r2735110631
##########
processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java:
##########
@@ -300,35 +300,81 @@ protected int findBucket(
final int startBucket = keyHash % buckets;
int bucket = startBucket;
- outer:
+ // Pre-compute hash with used flag for comparison.
+ final int keyHashWithUsedFlag = Groupers.getUsedFlag(keyHash);
+ final int keyBufferPosition = keyBuffer.position();
+
while (true) {
final int bucketOffset = bucket * bucketSizeWithHash;
+ final int storedHashWithUsedFlag =
targetTableBuffer.getInt(bucketOffset);
- if ((targetTableBuffer.get(bucketOffset) & 0x80) == 0) {
+ if ((storedHashWithUsedFlag & 0x80000000) == 0) {
// Found unused bucket before finding our key
return allowNewBucket ? bucket : -1;
}
- for (int i = bucketOffset + HASH_SIZE, j = keyBuffer.position(); j <
keyBuffer.position() + keySize; i++, j++) {
- if (targetTableBuffer.get(i) != keyBuffer.get(j)) {
- bucket += 1;
- if (bucket == buckets) {
- bucket = 0;
- }
+ if (storedHashWithUsedFlag == keyHashWithUsedFlag &&
+ keysEqual(targetTableBuffer, bucketOffset + HASH_SIZE, keyBuffer,
keyBufferPosition, keySize)) {
+ // Found our key in a used bucket
+ return bucket;
+ }
- if (bucket == startBucket) {
- // Came back around to the start without finding a free slot, that
was a long trip!
- // Should never happen unless buckets == regrowthThreshold.
- return -1;
- }
+ // Move to next bucket (linear probing)
+ bucket += 1;
+ if (bucket == buckets) {
+ bucket = 0;
+ }
- continue outer;
- }
+ if (bucket == startBucket) {
+ // Came back around to the start without finding a free slot, that was
a long trip!
+ // Should never happen unless buckets == regrowthThreshold.
+ return -1;
}
+ }
+ }
- // Found our key in a used bucket
- return bucket;
+ /**
+ * Compare keys using long/int comparisons for better performance than
byte-by-byte.
+ */
+ private static boolean keysEqual(
+ final ByteBuffer tableBuffer,
+ int tableOffset,
+ final ByteBuffer keyBuffer,
+ int keyOffset,
+ int length
+ )
+ {
+ // Compare 8 bytes at a time
+ while (length >= Long.BYTES) {
Review Comment:
Maybe we can save a comparison by switching to a do/while loop since I
believe `length` will always be ≥ 8. This likely will not show up in the
benchmark, however. Unfortunately we cannot do something like `[[likely]]` in
Java I don't think.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]