This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp.git


The following commit(s) were added to refs/heads/main by this push:
     new a7bdbd6d OPENNLP-1612 - DownloadUtil should validate checksum on 
download (#658)
a7bdbd6d is described below

commit a7bdbd6de6146d3737fa3780a08fd9067a472052
Author: Richard Zowalla <[email protected]>
AuthorDate: Mon Oct 7 08:26:22 2024 +0200

    OPENNLP-1612 - DownloadUtil should validate checksum on download (#658)
---
 .../main/java/opennlp/tools/util/DownloadUtil.java | 74 +++++++++++++++++++---
 .../opennlp/tools/EnabledWhenCDNAvailable.java     |  4 +-
 2 files changed, 66 insertions(+), 12 deletions(-)

diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/DownloadUtil.java 
b/opennlp-tools/src/main/java/opennlp/tools/util/DownloadUtil.java
index 244e8409..b0f979e5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/DownloadUtil.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/DownloadUtil.java
@@ -28,7 +28,11 @@ import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.nio.file.StandardCopyOption;
+import java.security.DigestInputStream;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
 import java.util.ArrayList;
+import java.util.Formatter;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -83,13 +87,11 @@ public class DownloadUtil {
   /**
    * Triggers a download for the specified {@link DownloadUtil.ModelType}.
    *
-   * @param language The ISO language code of the requested model.
+   * @param language  The ISO language code of the requested model.
    * @param modelType The {@link DownloadUtil.ModelType type} of model.
-   * @param type The class of the resulting model.
-   * @param <T> The generic type which is a subclass of {@link BaseModel}.
-   *
+   * @param type      The class of the resulting model.
+   * @param <T>       The generic type which is a subclass of {@link 
BaseModel}.
    * @return A model instance of type {@link T}.
-   *
    * @throws IOException Thrown if IO errors occurred or the model is invalid.
    */
   public static <T extends BaseModel> T downloadModel(String language, 
ModelType modelType,
@@ -113,14 +115,12 @@ public class DownloadUtil {
    * if it does not already exist. If a model to be downloaded already
    * exists in that directory, the model will not be re-downloaded.
    *
-   * @param url The model's {@link URL}.
+   * @param url  The model's {@link URL}.
    * @param type The class of the resulting model {@link T}.
-   * @param <T> The generic type which is a subclass of {@link BaseModel}.
-   *
+   * @param <T>  The generic type which is a subclass of {@link BaseModel}.
    * @return A model instance of type {@link T}.
-   *
    * @throws IOException Thrown if the model cannot be downloaded.
-  */
+   */
   public static <T extends BaseModel> T downloadModel(URL url, Class<T> type) 
throws IOException {
 
     final Path homeDirectory = Paths.get(System.getProperty("user.home") + 
"/.opennlp/");
@@ -138,6 +138,9 @@ public class DownloadUtil {
         Files.copy(in, localFile, StandardCopyOption.REPLACE_EXISTING);
       }
 
+      validateModel(new URL(url + ".sha512"), localFile);
+
+
       logger.debug("Download complete.");
     }
 
@@ -148,6 +151,57 @@ public class DownloadUtil {
     }
   }
 
+  /**
+   * Validates the downloaded model.
+   *
+   * @param sha512          the url to get the sha512 hash
+   * @param downloadedModel the model file to check
+   * @throws IOException thrown if the checksum could not be computed
+   */
+  private static void validateModel(URL sha512, Path downloadedModel) throws 
IOException {
+    // Download SHA512 checksum file
+    String expectedChecksum;
+    try (BufferedReader reader = new BufferedReader(new 
InputStreamReader(sha512.openStream()))) {
+      expectedChecksum = reader.readLine();
+
+      if (expectedChecksum != null) {
+        expectedChecksum = expectedChecksum.split("\\s")[0].trim();
+      }
+    }
+
+    // Validate SHA512 checksum
+    final String actualChecksum = calculateSHA512(downloadedModel);
+    if (!actualChecksum.equalsIgnoreCase(expectedChecksum)) {
+      throw new IOException("SHA512 checksum validation failed. Expected: "
+          + expectedChecksum + ", but got: " + actualChecksum);
+    }
+  }
+
+  private static String calculateSHA512(Path file) throws IOException {
+    try {
+      MessageDigest digest = MessageDigest.getInstance("SHA-512");
+      try (InputStream fis = Files.newInputStream(file);
+           DigestInputStream dis = new DigestInputStream(fis, digest)) {
+        byte[] buffer = new byte[4096];
+        while (dis.read(buffer) != -1) {
+          // Reading the file to update the digest
+        }
+      }
+      return byteArrayToHexString(digest.digest());
+    } catch (NoSuchAlgorithmException e) {
+      throw new IOException("SHA-512 algorithm not found", e);
+    }
+  }
+
+  private static String byteArrayToHexString(byte[] bytes) {
+    try (Formatter formatter = new Formatter()) {
+      for (byte b : bytes) {
+        formatter.format("%02x", b);
+      }
+      return formatter.toString();
+    }
+  }
+
   @Internal
   static class DownloadParser {
 
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/EnabledWhenCDNAvailable.java 
b/opennlp-tools/src/test/java/opennlp/tools/EnabledWhenCDNAvailable.java
index fb3878b8..ccba1b5d 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/EnabledWhenCDNAvailable.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/EnabledWhenCDNAvailable.java
@@ -52,10 +52,10 @@ public @interface EnabledWhenCDNAvailable {
         final String host = annotation.hostname();
         try (Socket socket = new Socket()) {
           socket.connect(new InetSocketAddress(host, 80), TIMEOUT_MS);
-          return ConditionEvaluationResult.enabled("Resouce (CDN) reachable.");
+          return ConditionEvaluationResult.enabled("Resource (CDN) 
reachable.");
         } catch (IOException e) {
           // Unreachable, unresolvable or timeout
-          return ConditionEvaluationResult.disabled("Resouce (CDN) 
unreachable.");
+          return ConditionEvaluationResult.disabled("Resource (CDN) 
unreachable.");
         }
       }
       return ConditionEvaluationResult.enabled("Nothing annotated with 
DisabledWhenOffline.");

Reply via email to