Github user kiszk commented on a diff in the pull request:
https://github.com/apache/spark/pull/19222#discussion_r171821972
--- Diff:
common/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java ---
@@ -87,6 +106,35 @@ public static int hashUnsafeBytes2(Object base, long
offset, int lengthInBytes,
return fmix(h1, lengthInBytes);
}
+ public static int hashUnsafeBytes2Block(MemoryBlock base, int seed) {
+ // This is compatible with original and another implementations.
+ // Use this method for new components after Spark 2.3.
+ long offset = base.getBaseOffset();
+ int lengthInBytes = (int)base.size();
+ assert (lengthInBytes >= 0) : "lengthInBytes cannot be negative";
+ int lengthAligned = lengthInBytes - lengthInBytes % 4;
+ int h1 = hashBytesByIntBlock(base.subBlock(offset, lengthAligned),
seed);
+ int k1 = 0;
+ for (int i = lengthAligned, shift = 0; i < lengthInBytes; i++, shift
+= 8) {
+ k1 ^= (base.getByte(offset + i) & 0xFF) << shift;
+ }
+ h1 ^= mixK1(k1);
+ return fmix(h1, lengthInBytes);
+ }
+
+ private static int hashBytesByIntBlock(MemoryBlock base, int seed) {
--- End diff --
ditto
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]