This is an automated email from the ASF dual-hosted git repository.

szetszwo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ratis.git


The following commit(s) were added to refs/heads/master by this push:
     new 1fd1349a8 RATIS-2361. Change MD5Hash to value-based. (#1316)
1fd1349a8 is described below

commit 1fd1349a891f6725cffd1eccff7f306bd33f474f
Author: Tsz-Wo Nicholas Sze <[email protected]>
AuthorDate: Thu Nov 20 07:43:41 2025 -0800

    RATIS-2361. Change MD5Hash to value-based. (#1316)
---
 ratis-common/dev-support/findbugsExcludeFile.xml   |   4 -
 .../src/main/java/org/apache/ratis/io/MD5Hash.java | 209 ++++++---------------
 .../java/org/apache/ratis/util/MD5FileUtil.java    |  29 ++-
 .../src/test/java/org/apache/ratis/io/TestMD5.java |  69 +++++++
 .../ratis/server/storage/FileChunkReader.java      |   4 +-
 .../ratis/server/storage/SnapshotManager.java      |  13 +-
 6 files changed, 150 insertions(+), 178 deletions(-)

diff --git a/ratis-common/dev-support/findbugsExcludeFile.xml 
b/ratis-common/dev-support/findbugsExcludeFile.xml
index 314875234..9267e763f 100644
--- a/ratis-common/dev-support/findbugsExcludeFile.xml
+++ b/ratis-common/dev-support/findbugsExcludeFile.xml
@@ -19,10 +19,6 @@
     <Class 
name="org.apache.ratis.datastream.impl.DataStreamReplyByteBuffer$Builder" />
     <Bug pattern="EI_EXPOSE_REP2" />
   </Match>
-  <Match>
-    <Class name="org.apache.ratis.io.MD5Hash" />
-    <Bug pattern="CT_CONSTRUCTOR_THROW" />
-  </Match>
   <Match>
     <Class name="org.apache.ratis.protocol.GroupInfoReply" />
     <Bug pattern="EI_EXPOSE_REP2" />
diff --git a/ratis-common/src/main/java/org/apache/ratis/io/MD5Hash.java 
b/ratis-common/src/main/java/org/apache/ratis/io/MD5Hash.java
index 0d19feb93..71fd39f34 100644
--- a/ratis-common/src/main/java/org/apache/ratis/io/MD5Hash.java
+++ b/ratis-common/src/main/java/org/apache/ratis/io/MD5Hash.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -18,157 +18,73 @@
 
 package org.apache.ratis.io;
 
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.InputStream;
-import java.security.MessageDigest;
-import java.security.NoSuchAlgorithmException;
-import java.util.Arrays;
-
-public class MD5Hash {
-  public static final int MD5_LEN = 16;
-
-  private static final ThreadLocal<MessageDigest> DIGESTER_FACTORY =
-      ThreadLocal.withInitial(MD5Hash::newDigester);
-
-  public static MessageDigest newDigester() {
-    try {
-      return MessageDigest.getInstance("MD5");
-    } catch (NoSuchAlgorithmException e) {
-      throw new IllegalStateException("Failed to create MessageDigest for 
MD5", e);
-    }
-  }
-
-  private byte[] digest;
-
-  /** Constructs an MD5Hash. */
-  public MD5Hash() {
-    this.digest = new byte[MD5_LEN];
-  }
-
-  /** Constructs an MD5Hash from a hex string. */
-  public MD5Hash(String hex) {
-    setDigest(hex);
-  }
-
-  /** Constructs an MD5Hash with a specified value. */
-  public MD5Hash(byte[] digest) {
-    if (digest.length != MD5_LEN) {
-      throw new IllegalArgumentException("Wrong length: " + digest.length);
-    }
-    this.digest = digest.clone();
-  }
-
-  public void readFields(DataInput in) throws IOException {
-    in.readFully(digest);
-  }
-
-  /** Constructs, reads and returns an instance. */
-  public static MD5Hash read(DataInput in) throws IOException {
-    MD5Hash result = new MD5Hash();
-    result.readFields(in);
-    return result;
-  }
-
-  public void write(DataOutput out) throws IOException {
-    out.write(digest);
-  }
-
-  /** Copy the contents of another instance into this instance. */
-  public void set(MD5Hash that) {
-    System.arraycopy(that.digest, 0, this.digest, 0, MD5_LEN);
-  }
-
-  /** Returns the digest bytes. */
-  public byte[] getDigest() {
-    return digest.clone();
-  }
-
-  /** Construct a hash value for a byte array. */
-  public static MD5Hash digest(byte[] data) {
-    return digest(data, 0, data.length);
-  }
-
-  /**
-   * Create a thread local MD5 digester
-   */
-  public static MessageDigest getDigester() {
-    MessageDigest digester = DIGESTER_FACTORY.get();
-    digester.reset();
-    return digester;
-  }
+import org.apache.ratis.util.MemoizedSupplier;
+import org.apache.ratis.util.Preconditions;
 
-  /** Construct a hash value for the content from the InputStream. */
-  public static MD5Hash digest(InputStream in) throws IOException {
-    final byte[] buffer = new byte[4*1024];
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Objects;
+import java.util.function.Supplier;
 
-    final MessageDigest digester = getDigester();
-    for(int n; (n = in.read(buffer)) != -1; ) {
-      digester.update(buffer, 0, n);
+/**
+ * A MD5 hash value.
+ * <p>
+ * This is a value-based class.
+ */
+public final class MD5Hash {
+  public static final int MD5_LENGTH = 16;
+
+  /** @return an instance with the given digest in a (case-insensitive) 
hexadecimals. */
+  public static MD5Hash newInstance(String digestHexadecimals) {
+    Objects.requireNonNull(digestHexadecimals, "digestHexadecimals == null");
+    Preconditions.assertSame(2 * MD5_LENGTH, digestHexadecimals.length(), 
"digestHexadecimals");
+
+    final byte[] digest = new byte[MD5_LENGTH];
+    for (int i = 0; i < MD5_LENGTH; i++) {
+      final int j = i << 1;
+      digest[i] = (byte) (charToNibble(digestHexadecimals, j) << 4 |
+          charToNibble(digestHexadecimals, j + 1));
     }
-
-    return new MD5Hash(digester.digest());
-  }
-
-  /** Construct a hash value for a byte array. */
-  public static MD5Hash digest(byte[] data, int start, int len) {
-    byte[] digest;
-    MessageDigest digester = getDigester();
-    digester.update(data, start, len);
-    digest = digester.digest();
     return new MD5Hash(digest);
   }
 
-  /** Construct a hash value for an array of byte array. */
-  public static MD5Hash digest(byte[][] dataArr, int start, int len) {
-    byte[] digest;
-    MessageDigest digester = getDigester();
-    for (byte[] data : dataArr) {
-      digester.update(data, start, len);
-    }
-    digest = digester.digest();
-    return new MD5Hash(digest);
+  /** @return an instance with the given digest. */
+  public static MD5Hash newInstance(byte[] digest) {
+    Objects.requireNonNull(digest, "digest == null");
+    Preconditions.assertSame(MD5_LENGTH, digest.length, "digest");
+    return new MD5Hash(digest.clone());
   }
 
-  /** Construct a half-sized version of this MD5.  Fits in a long **/
-  public long halfDigest() {
-    long value = 0;
-    for (int i = 0; i < 8; i++) {
-      value |= ((digest[i] & 0xffL) << (8*(7-i)));
-    }
-    return value;
+  private final byte[] digest;
+  private final Supplier<String> digestString;
+
+  private MD5Hash(byte[] digest) {
+    this.digest = digest;
+    this.digestString = MemoizedSupplier.valueOf(() -> digestToString(digest));
   }
 
-  /**
-   * Return a 32-bit digest of the MD5.
-   * @return the first 4 bytes of the md5
-   */
-  public int quarterDigest() {
-    int value = 0;
-    for (int i = 0; i < 4; i++) {
-      value |= ((digest[i] & 0xff) << (8*(3-i)));
-    }
-    return value;
+  /** @return the digest wrapped by a read-only {@link ByteBuffer}. */
+  public ByteBuffer getDigest() {
+    return ByteBuffer.wrap(digest).asReadOnlyBuffer();
   }
 
-  /** Returns true iff <code>o</code> is an MD5Hash whose digest contains the
-   * same values.  */
   @Override
-  public boolean equals(Object o) {
-    if (!(o instanceof MD5Hash)) {
+  public boolean equals(Object object) {
+    if (this == object) {
+      return true;
+    } else if (!(object instanceof MD5Hash)) {
       return false;
     }
-    MD5Hash other = (MD5Hash)o;
-    return Arrays.equals(this.digest, other.digest);
+    final MD5Hash that = (MD5Hash) object;
+    return Arrays.equals(this.digest, that.digest);
   }
 
-  /** Returns a hash code value for this object.
-   * Only uses the first 4 bytes, since md5s are evenly distributed.
-   */
   @Override
   public int hashCode() {
-    return quarterDigest();
+    return ((digest[0] & 0xFF) << 24)
+        |  ((digest[1] & 0xFF) << 16)
+        |  ((digest[2] & 0xFF) << 8)
+        |   (digest[3] & 0xFF);
   }
 
   private static final char[] HEX_DIGITS =
@@ -177,8 +93,12 @@ public class MD5Hash {
   /** Returns a string representation of this object. */
   @Override
   public String toString() {
-    StringBuilder buf = new StringBuilder(MD5_LEN*2);
-    for (int i = 0; i < MD5_LEN; i++) {
+    return digestString.get();
+  }
+
+  static String digestToString(byte[] digest) {
+    StringBuilder buf = new StringBuilder(MD5_LENGTH *2);
+    for (int i = 0; i < MD5_LENGTH; i++) {
       int b = digest[i];
       buf.append(HEX_DIGITS[(b >> 4) & 0xf]);
       buf.append(HEX_DIGITS[b & 0xf]);
@@ -186,20 +106,8 @@ public class MD5Hash {
     return buf.toString();
   }
 
-  /** Sets the digest value from a hex string. */
-  public void setDigest(String hex) {
-    if (hex.length() != MD5_LEN*2) {
-      throw new IllegalArgumentException("Wrong length: " + hex.length());
-    }
-    this.digest = new byte[MD5_LEN];
-    for (int i = 0; i < MD5_LEN; i++) {
-      int j = i << 1;
-      this.digest[i] = (byte)(charToNibble(hex.charAt(j)) << 4 |
-          charToNibble(hex.charAt(j+1)));
-    }
-  }
-
-  private static int charToNibble(char c) {
+  private static int charToNibble(String hexadecimals, int i) {
+    final char c = hexadecimals.charAt(i);
     if (c >= '0' && c <= '9') {
       return c - '0';
     } else if (c >= 'a' && c <= 'f') {
@@ -207,7 +115,8 @@ public class MD5Hash {
     } else if (c >= 'A' && c <= 'F') {
       return 0xA + (c - 'A');
     } else {
-      throw new RuntimeException("Not a hex character: " + c);
+      throw new IllegalArgumentException(
+          "Found a non-hexadecimal character '" + c + "' at index " + i + " in 
\"" + hexadecimals + "\"");
     }
   }
 }
diff --git a/ratis-common/src/main/java/org/apache/ratis/util/MD5FileUtil.java 
b/ratis-common/src/main/java/org/apache/ratis/util/MD5FileUtil.java
index 8a38f45e6..2c217b27d 100644
--- a/ratis-common/src/main/java/org/apache/ratis/util/MD5FileUtil.java
+++ b/ratis-common/src/main/java/org/apache/ratis/util/MD5FileUtil.java
@@ -18,8 +18,6 @@
 package org.apache.ratis.util;
 
 import org.apache.ratis.io.MD5Hash;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import java.io.BufferedReader;
 import java.io.File;
@@ -29,16 +27,21 @@ import java.nio.channels.FileChannel;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.StandardOpenOption;
 import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
 import java.util.Optional;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-public abstract class MD5FileUtil {
-  public static final Logger LOG = LoggerFactory.getLogger(MD5FileUtil.class);
+public final class MD5FileUtil {
+  private MD5FileUtil() {}
 
-  // TODO: we should provide something like Hadoop's checksum fs for the local 
filesystem
-  // so that individual state machines do not have to deal with 
checksumming/corruption prevention.
-  // Keep the checksum and data in the same block format instead of individual 
files.
+  public static MessageDigest newMD5() {
+    try {
+      return MessageDigest.getInstance("MD5");
+    } catch (NoSuchAlgorithmException e) {
+      throw new IllegalStateException("Failed to create MessageDigest for 
MD5", e);
+    }
+  }
 
   public static final String MD5_SUFFIX = ".md5";
   private static final String LINE_REGEX = "([0-9a-f]{32}) [ *](.+)";
@@ -105,7 +108,7 @@ public abstract class MD5FileUtil {
               referencedFile.getName() + " but we expected it to reference " +
               dataFile);
     }
-    return new MD5Hash(storedHash);
+    return MD5Hash.newInstance(storedHash);
   }
 
   /**
@@ -113,7 +116,7 @@ public abstract class MD5FileUtil {
    */
   public static MD5Hash computeMd5ForFile(File dataFile) throws IOException {
     final int bufferSize = SizeInBytes.ONE_MB.getSizeInt();
-    final MessageDigest digester = MD5Hash.getDigester();
+    final MessageDigest digester = newMD5();
     try (FileChannel in = FileUtils.newFileChannel(dataFile, 
StandardOpenOption.READ)) {
       final long fileSize = in.size();
       for (int offset = 0; offset < fileSize; ) {
@@ -122,7 +125,7 @@ public abstract class MD5FileUtil {
         offset += readSize;
       }
     }
-    return new MD5Hash(digester.digest());
+    return MD5Hash.newInstance(digester.digest());
   }
 
   public static MD5Hash computeAndSaveMd5ForFile(File dataFile) {
@@ -147,7 +150,7 @@ public abstract class MD5FileUtil {
    */
   public static void saveMD5File(File dataFile, MD5Hash digest)
       throws IOException {
-    final String digestString = 
StringUtils.bytes2HexString(digest.getDigest());
+    final String digestString = digest.toString();
     saveMD5File(dataFile, digestString);
   }
 
@@ -162,10 +165,6 @@ public abstract class MD5FileUtil {
     try (AtomicFileOutputStream afos = new AtomicFileOutputStream(md5File)) {
       afos.write(md5Line.getBytes(StandardCharsets.UTF_8));
     }
-
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("Saved MD5 " + digestString + " to " + md5File);
-    }
   }
 
   /**
diff --git a/ratis-common/src/test/java/org/apache/ratis/io/TestMD5.java 
b/ratis-common/src/test/java/org/apache/ratis/io/TestMD5.java
new file mode 100644
index 000000000..7e7c29b64
--- /dev/null
+++ b/ratis-common/src/test/java/org/apache/ratis/io/TestMD5.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.ratis.io;
+
+import org.apache.ratis.util.StringUtils;
+import org.junit.jupiter.api.Test;
+
+import java.nio.ByteBuffer;
+import java.util.concurrent.ThreadLocalRandom;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+class TestMD5 {
+
+  @Test
+  void testMD5Hash() {
+    final byte[] digest = new byte[MD5Hash.MD5_LENGTH];
+    final ThreadLocalRandom random = ThreadLocalRandom.current();
+    
+    for(int i = 0; i < 1000; i++) {
+      random.nextBytes(digest);
+      final MD5Hash md5 = MD5Hash.newInstance(digest);
+
+      // test hashCode
+      final int expectedHashCode = oldQuarterDigest(digest);
+      assertEquals(expectedHashCode, md5.hashCode());
+
+      // test toString
+      final String expectedString = StringUtils.bytes2HexString(digest);
+      assertEquals(expectedString, md5.toString());
+      assertEquals(expectedString, MD5Hash.digestToString(digest));
+
+      // test newInstance(String)
+      assertEquals(md5, MD5Hash.newInstance(expectedString.toLowerCase()));
+      assertEquals(md5, MD5Hash.newInstance(expectedString.toUpperCase()));
+
+      // test getDigest
+      final ByteBuffer expectedByteBuffer = ByteBuffer.wrap(digest);
+      assertEquals(expectedByteBuffer, md5.getDigest());
+    }
+  }
+
+  /**
+   * Return a 32-bit digest of the MD5.
+   * @return the first 4 bytes of the md5
+   */
+  private static int oldQuarterDigest(byte[] digest) {
+    int value = 0;
+    for (int i = 0; i < 4; i++) {
+      value |= ((digest[i] & 0xff) << (8*(3-i)));
+    }
+    return value;
+  }
+}
\ No newline at end of file
diff --git 
a/ratis-server/src/main/java/org/apache/ratis/server/storage/FileChunkReader.java
 
b/ratis-server/src/main/java/org/apache/ratis/server/storage/FileChunkReader.java
index a12818443..b80924eef 100644
--- 
a/ratis-server/src/main/java/org/apache/ratis/server/storage/FileChunkReader.java
+++ 
b/ratis-server/src/main/java/org/apache/ratis/server/storage/FileChunkReader.java
@@ -17,13 +17,13 @@
  */
 package org.apache.ratis.server.storage;
 
-import org.apache.ratis.io.MD5Hash;
 import org.apache.ratis.proto.RaftProtos.FileChunkProto;
 import org.apache.ratis.thirdparty.com.google.protobuf.ByteString;
 import org.apache.ratis.thirdparty.com.google.protobuf.UnsafeByteOperations;
 import org.apache.ratis.util.FileUtils;
 import org.apache.ratis.util.IOUtils;
 import org.apache.ratis.util.JavaUtils;
+import org.apache.ratis.util.MD5FileUtil;
 
 import java.io.Closeable;
 import java.io.File;
@@ -56,7 +56,7 @@ public class FileChunkReader implements Closeable {
     this.relativePath = relativePath;
     final File f = info.getPath().toFile();
     if (info.getFileDigest() == null) {
-      digester = MD5Hash.newDigester();
+      digester = MD5FileUtil.newMD5();
       this.in = new DigestInputStream(FileUtils.newInputStream(f), digester);
     } else {
       digester = null;
diff --git 
a/ratis-server/src/main/java/org/apache/ratis/server/storage/SnapshotManager.java
 
b/ratis-server/src/main/java/org/apache/ratis/server/storage/SnapshotManager.java
index 2d10c53a4..a96001b59 100644
--- 
a/ratis-server/src/main/java/org/apache/ratis/server/storage/SnapshotManager.java
+++ 
b/ratis-server/src/main/java/org/apache/ratis/server/storage/SnapshotManager.java
@@ -87,7 +87,7 @@ public class SnapshotManager {
       }
       // create the temp snapshot file and put padding inside
       out = FileUtils.newFileChannel(tmpSnapshotFile, 
StandardOpenOption.WRITE, StandardOpenOption.CREATE);
-      digester = MD5Hash.newDigester();
+      digester = MD5FileUtil.newMD5();
     } else {
       if (!exists) {
         throw new FileNotFoundException("Chunk offset is non-zero but file is 
not found: " + tmpSnapshotFile
@@ -138,11 +138,10 @@ public class SnapshotManager {
       // rename the temp snapshot file if this is the last chunk. also verify
       // the md5 digest and create the md5 meta-file.
       if (chunk.getDone()) {
-        final MD5Hash expectedDigest =
-            new MD5Hash(chunk.getFileDigest().toByteArray());
+        final MD5Hash expectedDigest = 
MD5Hash.newInstance(chunk.getFileDigest().toByteArray());
         // calculate the checksum of the snapshot file and compare it with the
         // file digest in the request
-        final MD5Hash digest = new MD5Hash(digester.digest());
+        final MD5Hash digest = MD5Hash.newInstance(digester.digest());
         if (!digest.equals(expectedDigest)) {
           LOG.warn("The snapshot md5 digest {} does not match expected {}",
               digest, expectedDigest);
@@ -180,8 +179,8 @@ public class SnapshotManager {
       try {
         moved = FileUtils.move(stateMachineDir, TMP + 
StringUtils.currentDateTime());
       } catch(IOException e) {
-        LOG.warn("Failed to rename state machine directory " + 
stateMachineDir.getAbsolutePath()
-            + " to a " + TMP + " directory.  Try deleting it directly.", e);
+        LOG.warn("Failed to rename state machine directory {} to a " + TMP + " 
directory.  Try deleting it directly.",
+            stateMachineDir.getAbsolutePath(), e);
         FileUtils.deleteFully(stateMachineDir);
       }
       existingDir = moved;
@@ -202,7 +201,7 @@ public class SnapshotManager {
       try {
         FileUtils.deleteFully(existingDir);
       } catch (IOException e) {
-        LOG.warn("Failed to delete existing directory " + 
existingDir.getAbsolutePath(), e);
+        LOG.warn("Failed to delete existing directory {}", 
existingDir.getAbsolutePath(), e);
       }
     }
   }

Reply via email to