Github user cloud-fan commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19222#discussion_r176985334
  
    --- Diff: 
common/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java ---
    @@ -49,49 +51,70 @@ public static int hashInt(int input, int seed) {
       }
     
       public int hashUnsafeWords(Object base, long offset, int lengthInBytes) {
    -    return hashUnsafeWords(base, offset, lengthInBytes, seed);
    +    return hashUnsafeWordsBlock(MemoryBlock.allocateFromObject(base, 
offset, lengthInBytes), seed);
       }
     
    -  public static int hashUnsafeWords(Object base, long offset, int 
lengthInBytes, int seed) {
    +  public static int hashUnsafeWordsBlock(MemoryBlock base, int seed) {
         // This is based on Guava's 
`Murmur32_Hasher.processRemaining(ByteBuffer)` method.
    +    int lengthInBytes = Ints.checkedCast(base.size());
         assert (lengthInBytes % 8 == 0): "lengthInBytes must be a multiple of 
8 (word-aligned)";
    -    int h1 = hashBytesByInt(base, offset, lengthInBytes, seed);
    +    int h1 = hashBytesByIntBlock(base, seed);
         return fmix(h1, lengthInBytes);
       }
     
    -  public static int hashUnsafeBytes(Object base, long offset, int 
lengthInBytes, int seed) {
    +  public static int hashUnsafeWords(Object base, long offset, int 
lengthInBytes, int seed) {
    +    // This is based on Guava's 
`Murmur32_Hasher.processRemaining(ByteBuffer)` method.
    +    assert (lengthInBytes % 8 == 0): "lengthInBytes must be a multiple of 
8 (word-aligned)";
    +    return hashUnsafeWordsBlock(MemoryBlock.allocateFromObject(base, 
offset, lengthInBytes), seed);
    +  }
    +
    +  public static int hashUnsafeBytesBlock(MemoryBlock base, int seed) {
         // This is not compatible with original and another implementations.
         // But remain it for backward compatibility for the components 
existing before 2.3.
    +    long offset = base.getBaseOffset();
    +    long lengthInBytes = base.size();
         assert (lengthInBytes >= 0): "lengthInBytes cannot be negative";
    -    int lengthAligned = lengthInBytes - lengthInBytes % 4;
    -    int h1 = hashBytesByInt(base, offset, lengthAligned, seed);
    -    for (int i = lengthAligned; i < lengthInBytes; i++) {
    -      int halfWord = Platform.getByte(base, offset + i);
    +    long lengthAligned = lengthInBytes - lengthInBytes % 4;
    +    int h1 = hashBytesByIntBlock(base.subBlock(0, lengthAligned), seed);
    +    for (long i = lengthAligned; i < lengthInBytes; i++) {
    +      int halfWord = base.getByte(offset + i);
           int k1 = mixK1(halfWord);
           h1 = mixH1(h1, k1);
         }
    -    return fmix(h1, lengthInBytes);
    +    return fmix(h1, Ints.checkedCast(lengthInBytes));
    +  }
    +
    +  public static int hashUnsafeBytes(Object base, long offset, int 
lengthInBytes, int seed) {
    +    return hashUnsafeBytesBlock(MemoryBlock.allocateFromObject(base, 
offset, lengthInBytes), seed);
       }
     
       public static int hashUnsafeBytes2(Object base, long offset, int 
lengthInBytes, int seed) {
    +    return hashUnsafeBytes2Block(MemoryBlock.allocateFromObject(base, 
offset, lengthInBytes), seed);
    +  }
    +
    +  public static int hashUnsafeBytes2Block(MemoryBlock base, int seed) {
         // This is compatible with original and another implementations.
         // Use this method for new components after Spark 2.3.
    -    assert (lengthInBytes >= 0): "lengthInBytes cannot be negative";
    -    int lengthAligned = lengthInBytes - lengthInBytes % 4;
    -    int h1 = hashBytesByInt(base, offset, lengthAligned, seed);
    +    long offset = base.getBaseOffset();
    +    long lengthInBytes = base.size();
    --- End diff --
    
    ditto


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to