This is an automated email from the ASF dual-hosted git repository.

rzo1 pushed a commit to branch experimental/download_util_enhancements
in repository https://gitbox.apache.org/repos/asf/opennlp.git

commit 5ecc81eabc0bec77a96efdef2b96007d724996d4
Author: Richard Zowalla <[email protected]>
AuthorDate: Tue Nov 26 20:10:16 2024 +0100

    x
---
 .../main/java/opennlp/tools/util/DownloadUtil.java | 34 ++++++-----
 .../java/opennlp/tools/util/DownloadUtilTest.java  | 65 +++++++++++-----------
 2 files changed, 53 insertions(+), 46 deletions(-)

diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/DownloadUtil.java 
b/opennlp-tools/src/main/java/opennlp/tools/util/DownloadUtil.java
index 76fb7bc6..57895c38 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/DownloadUtil.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/DownloadUtil.java
@@ -18,6 +18,7 @@
 package opennlp.tools.util;
 
 import java.io.BufferedReader;
+import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
@@ -32,6 +33,7 @@ import java.security.DigestInputStream;
 import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.Formatter;
 import java.util.HashMap;
 import java.util.List;
@@ -75,15 +77,7 @@ public class DownloadUtil {
   private static final String BASE_URL = "https://dlcdn.apache.org/opennlp/";;
   private static final String MODELS_UD_MODELS_1_2 = "models/ud-models-1.2/";
 
-  public static final Map<String, Map<ModelType, String>> available_models;
-
-  static {
-    try {
-      available_models = new DownloadParser(new URL(BASE_URL + 
MODELS_UD_MODELS_1_2)).getAvailableModels();
-    } catch (MalformedURLException e) {
-      throw new RuntimeException(e);
-    }
-  }
+  private static Map<String, Map<ModelType, String>> availableModels;
 
   /**
    * Triggers a download for the specified {@link DownloadUtil.ModelType}.
@@ -98,8 +92,8 @@ public class DownloadUtil {
   public static <T extends BaseModel> T downloadModel(String language, 
ModelType modelType,
                                                       Class<T> type) throws 
IOException {
 
-    if (available_models.containsKey(language)) {
-      final String url = (available_models.get(language).get(modelType));
+    if (getAvailableModels().containsKey(language)) {
+      final String url = (getAvailableModels().get(language).get(modelType));
       if (url != null) {
         return downloadModel(new URL(url), type);
       }
@@ -124,12 +118,12 @@ public class DownloadUtil {
    */
   public static <T extends BaseModel> T downloadModel(URL url, Class<T> type) 
throws IOException {
 
-    final Path homeDirectory = Paths.get(System.getProperty("user.home") + 
"/.opennlp/");
+    final Path homeDirectory = 
Paths.get(System.getProperty("user.home")).resolve(".opennlp");
     if (!Files.isDirectory(homeDirectory)) {
       homeDirectory.toFile().mkdir();
     }
 
-    final String filename = 
url.toString().substring(url.toString().lastIndexOf("/") + 1);
+    final String filename = 
url.toString().substring(url.toString().lastIndexOf(File.separator) + 1);
     final Path localFile = Paths.get(homeDirectory.toString(), filename);
 
     if (!Files.exists(localFile)) {
@@ -141,8 +135,9 @@ public class DownloadUtil {
 
       validateModel(new URL(url + ".sha512"), localFile);
 
-
       logger.debug("Download complete.");
+    } else {
+      logger.debug("Model file '{}' already exists. Skipping download.", 
filename);
     }
 
     try {
@@ -152,6 +147,17 @@ public class DownloadUtil {
     }
   }
 
+  public static Map<String, Map<ModelType, String>> getAvailableModels() {
+    if(availableModels == null) {
+      try {
+        availableModels = new DownloadParser(new URL(BASE_URL + 
MODELS_UD_MODELS_1_2)).getAvailableModels();
+      } catch (MalformedURLException e) {
+        throw new RuntimeException(e);
+      }
+    }
+    return Collections.unmodifiableMap(availableModels);
+  }
+
   /**
    * Validates the downloaded model.
    *
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/util/DownloadUtilTest.java 
b/opennlp-tools/src/test/java/opennlp/tools/util/DownloadUtilTest.java
index 6ab0aa4c..ae6f13e3 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/util/DownloadUtilTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/DownloadUtilTest.java
@@ -104,6 +104,7 @@ public class DownloadUtilTest {
   @EnabledWhenCDNAvailable(hostname = "dlcdn.apache.org")
   public void testDownloadModelByURL(String language, URL url) throws 
IOException {
     TokenizerModel model = DownloadUtil.downloadModel(url, 
TokenizerModel.class);
+    System.out.println(url);
     assertNotNull(model);
     assertEquals(language, model.getLanguage());
     assertTrue(model.isLoadedFromSerialized());
@@ -123,38 +124,38 @@ public class DownloadUtilTest {
   // Note: This needs to be public as JUnit 5 requires it like this.
   public static Stream<Arguments> provideURLs() {
     return Stream.of(
-            Arguments.of("en", 
DownloadUtil.available_models.get("en").get(MT_TOKENIZER)),
-            Arguments.of("fr", 
DownloadUtil.available_models.get("fr").get(MT_TOKENIZER)),
-            Arguments.of("de", 
DownloadUtil.available_models.get("de").get(MT_TOKENIZER)),
-            Arguments.of("it", 
DownloadUtil.available_models.get("it").get(MT_TOKENIZER)),
-            Arguments.of("nl", 
DownloadUtil.available_models.get("nl").get(MT_TOKENIZER)),
-            Arguments.of("bg", 
DownloadUtil.available_models.get("bg").get(MT_TOKENIZER)),
-            Arguments.of("ca", 
DownloadUtil.available_models.get("ca").get(MT_TOKENIZER)),
-            Arguments.of("cs", 
DownloadUtil.available_models.get("cs").get(MT_TOKENIZER)),
-            Arguments.of("da", 
DownloadUtil.available_models.get("da").get(MT_TOKENIZER)),
-            Arguments.of("el", 
DownloadUtil.available_models.get("el").get(MT_TOKENIZER)),
-            Arguments.of("es", 
DownloadUtil.available_models.get("es").get(MT_TOKENIZER)),
-            Arguments.of("et", 
DownloadUtil.available_models.get("et").get(MT_TOKENIZER)),
-            Arguments.of("eu", 
DownloadUtil.available_models.get("eu").get(MT_TOKENIZER)),
-            Arguments.of("fi", 
DownloadUtil.available_models.get("fi").get(MT_TOKENIZER)),
-            Arguments.of("hr", 
DownloadUtil.available_models.get("hr").get(MT_TOKENIZER)),
-            Arguments.of("hy", 
DownloadUtil.available_models.get("hy").get(MT_TOKENIZER)),
-            Arguments.of("is", 
DownloadUtil.available_models.get("is").get(MT_TOKENIZER)),
-            Arguments.of("ka", 
DownloadUtil.available_models.get("ka").get(MT_TOKENIZER)),
-            Arguments.of("kk", 
DownloadUtil.available_models.get("kk").get(MT_TOKENIZER)),
-            Arguments.of("ko", 
DownloadUtil.available_models.get("ko").get(MT_TOKENIZER)),
-            Arguments.of("lv", 
DownloadUtil.available_models.get("lv").get(MT_TOKENIZER)),
-            Arguments.of("no", 
DownloadUtil.available_models.get("no").get(MT_TOKENIZER)),
-            Arguments.of("pl", 
DownloadUtil.available_models.get("pl").get(MT_TOKENIZER)),
-            Arguments.of("pt", 
DownloadUtil.available_models.get("pt").get(MT_TOKENIZER)),
-            Arguments.of("ro", 
DownloadUtil.available_models.get("ro").get(MT_TOKENIZER)),
-            Arguments.of("ru", 
DownloadUtil.available_models.get("ru").get(MT_TOKENIZER)),
-            Arguments.of("sk", 
DownloadUtil.available_models.get("sk").get(MT_TOKENIZER)),
-            Arguments.of("sl", 
DownloadUtil.available_models.get("sl").get(MT_TOKENIZER)),
-            Arguments.of("sr", 
DownloadUtil.available_models.get("sr").get(MT_TOKENIZER)),
-            Arguments.of("sv", 
DownloadUtil.available_models.get("sv").get(MT_TOKENIZER)),
-            Arguments.of("tr", 
DownloadUtil.available_models.get("tr").get(MT_TOKENIZER)),
-            Arguments.of("uk", 
DownloadUtil.available_models.get("uk").get(MT_TOKENIZER))
+            Arguments.of("en", 
DownloadUtil.getAvailableModels().get("en").get(MT_TOKENIZER)),
+            Arguments.of("fr", 
DownloadUtil.getAvailableModels().get("fr").get(MT_TOKENIZER)),
+            Arguments.of("de", 
DownloadUtil.getAvailableModels().get("de").get(MT_TOKENIZER)),
+            Arguments.of("it", 
DownloadUtil.getAvailableModels().get("it").get(MT_TOKENIZER)),
+            Arguments.of("nl", 
DownloadUtil.getAvailableModels().get("nl").get(MT_TOKENIZER)),
+            Arguments.of("bg", 
DownloadUtil.getAvailableModels().get("bg").get(MT_TOKENIZER)),
+            Arguments.of("ca", 
DownloadUtil.getAvailableModels().get("ca").get(MT_TOKENIZER)),
+            Arguments.of("cs", 
DownloadUtil.getAvailableModels().get("cs").get(MT_TOKENIZER)),
+            Arguments.of("da", 
DownloadUtil.getAvailableModels().get("da").get(MT_TOKENIZER)),
+            Arguments.of("el", 
DownloadUtil.getAvailableModels().get("el").get(MT_TOKENIZER)),
+            Arguments.of("es", 
DownloadUtil.getAvailableModels().get("es").get(MT_TOKENIZER)),
+            Arguments.of("et", 
DownloadUtil.getAvailableModels().get("et").get(MT_TOKENIZER)),
+            Arguments.of("eu", 
DownloadUtil.getAvailableModels().get("eu").get(MT_TOKENIZER)),
+            Arguments.of("fi", 
DownloadUtil.getAvailableModels().get("fi").get(MT_TOKENIZER)),
+            Arguments.of("hr", 
DownloadUtil.getAvailableModels().get("hr").get(MT_TOKENIZER)),
+            Arguments.of("hy", 
DownloadUtil.getAvailableModels().get("hy").get(MT_TOKENIZER)),
+            Arguments.of("is", 
DownloadUtil.getAvailableModels().get("is").get(MT_TOKENIZER)),
+            Arguments.of("ka", 
DownloadUtil.getAvailableModels().get("ka").get(MT_TOKENIZER)),
+            Arguments.of("kk", 
DownloadUtil.getAvailableModels().get("kk").get(MT_TOKENIZER)),
+            Arguments.of("ko", 
DownloadUtil.getAvailableModels().get("ko").get(MT_TOKENIZER)),
+            Arguments.of("lv", 
DownloadUtil.getAvailableModels().get("lv").get(MT_TOKENIZER)),
+            Arguments.of("no", 
DownloadUtil.getAvailableModels().get("no").get(MT_TOKENIZER)),
+            Arguments.of("pl", 
DownloadUtil.getAvailableModels().get("pl").get(MT_TOKENIZER)),
+            Arguments.of("pt", 
DownloadUtil.getAvailableModels().get("pt").get(MT_TOKENIZER)),
+            Arguments.of("ro", 
DownloadUtil.getAvailableModels().get("ro").get(MT_TOKENIZER)),
+            Arguments.of("ru", 
DownloadUtil.getAvailableModels().get("ru").get(MT_TOKENIZER)),
+            Arguments.of("sk", 
DownloadUtil.getAvailableModels().get("sk").get(MT_TOKENIZER)),
+            Arguments.of("sl", 
DownloadUtil.getAvailableModels().get("sl").get(MT_TOKENIZER)),
+            Arguments.of("sr", 
DownloadUtil.getAvailableModels().get("sr").get(MT_TOKENIZER)),
+            Arguments.of("sv", 
DownloadUtil.getAvailableModels().get("sv").get(MT_TOKENIZER)),
+            Arguments.of("tr", 
DownloadUtil.getAvailableModels().get("tr").get(MT_TOKENIZER)),
+            Arguments.of("uk", 
DownloadUtil.getAvailableModels().get("uk").get(MT_TOKENIZER))
     );
   }
 }

Reply via email to