This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch 
OPENNLP-1661-Fix-custom-models-being-wiped-from-OpenNLP-user.home-directory
in repository https://gitbox.apache.org/repos/asf/opennlp.git

commit 25fa67aaf9a71ceff6932e095f89f86e5f0d5106
Author: Martin Wiesner <[email protected]>
AuthorDate: Mon Dec 2 20:30:19 2024 +0100

    OPENNLP-1661 Fix custom models being wiped from OpenNLP user.home directory
    - deletes AbstractDownloadUtilTest.java removing historical code that wiped 
models
    - adds package-private DownloadUtil#existsModel(..) method to check models 
for certain language exist locally
    - adds to and adjusts related test classes
---
 .../main/java/opennlp/tools/util/DownloadUtil.java | 50 +++++++++++---
 .../tools/util/AbstractDownloadUtilTest.java       | 79 ----------------------
 .../tools/util/DownloadUtilDownloadTwiceTest.java  | 30 ++++----
 .../java/opennlp/tools/util/DownloadUtilTest.java  | 25 ++++++-
 4 files changed, 81 insertions(+), 103 deletions(-)

diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/DownloadUtil.java 
b/opennlp-tools/src/main/java/opennlp/tools/util/DownloadUtil.java
index 11b328da..bbfa2426 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/DownloadUtil.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/DownloadUtil.java
@@ -25,6 +25,7 @@ import java.net.MalformedURLException;
 import java.net.URL;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
+import java.nio.file.NoSuchFileException;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.nio.file.StandardCopyOption;
@@ -80,6 +81,41 @@ public class DownloadUtil {
 
   private static Map<String, Map<ModelType, String>> availableModels;
 
+  /**
+   * Checks if a model of the specified {@code modelType} has been downloaded 
already
+   * for a particular {@code language}.
+   *
+   * @param language  The ISO language code of the requested model.
+   * @param modelType The {@link DownloadUtil.ModelType type} of model.
+   * @return {@code true} if a model exists locally, {@code false} otherwise.
+   * @throws IOException Thrown if IO errors occurred or the model is invalid.
+   */
+  static boolean existsModel(String language, ModelType modelType) throws 
IOException {
+    Map<ModelType, String> modelsByLanguage = 
getAvailableModels().get(language);
+    if (modelsByLanguage == null) {
+      return false;
+    } else {
+      final String url = modelsByLanguage.get(modelType);
+      if (url != null) {
+        final Path homeDirectory = 
Paths.get(System.getProperty("OPENNLP_DOWNLOAD_HOME",
+                System.getProperty("user.home"))).resolve(".opennlp");
+        final String filename = url.substring(url.lastIndexOf("/") + 1);
+        final Path localFile = Paths.get(homeDirectory.toString(), filename);
+        boolean exists;
+        try {
+          // if this does not throw the requested model exists AND is valid!
+          validateModel(new URL(url + ".sha512"), localFile);
+          exists = true;
+        } catch (NoSuchFileException e) {
+          exists = false;
+        }
+        return exists;
+      } else {
+        return false;
+      }
+    }
+  }
+
   /**
    * Triggers a download for the specified {@link DownloadUtil.ModelType}.
    *
@@ -94,7 +130,7 @@ public class DownloadUtil {
                                                       Class<T> type) throws 
IOException {
 
     if (getAvailableModels().containsKey(language)) {
-      final String url = (getAvailableModels().get(language).get(modelType));
+      final String url = getAvailableModels().get(language).get(modelType);
       if (url != null) {
         return downloadModel(new URL(url), type);
       }
@@ -134,17 +170,14 @@ public class DownloadUtil {
     final Path localFile = Paths.get(homeDirectory.toString(), filename);
 
     if (!Files.exists(localFile)) {
-      logger.debug("Downloading model from {} to {}.", url, localFile);
+      logger.debug("Downloading model to {}.", localFile);
 
       try (final InputStream in = url.openStream()) {
         Files.copy(in, localFile, StandardCopyOption.REPLACE_EXISTING);
       }
-
       validateModel(new URL(url + ".sha512"), localFile);
-
       logger.debug("Download complete.");
     } else {
-      System.out.println("Model file already exists. Skipping download.");
       logger.debug("Model file '{}' already exists. Skipping download.", 
filename);
     }
 
@@ -167,7 +200,7 @@ public class DownloadUtil {
   }
 
   /**
-   * Validates the downloaded model.
+   * Validates a downloaded model via the specified {@link Path 
downloadedModel path}.
    *
    * @param sha512          the url to get the sha512 hash
    * @param downloadedModel the model file to check
@@ -187,8 +220,8 @@ public class DownloadUtil {
     // Validate SHA512 checksum
     final String actualChecksum = calculateSHA512(downloadedModel);
     if (!actualChecksum.equalsIgnoreCase(expectedChecksum)) {
-      throw new IOException("SHA512 checksum validation failed. Expected: "
-          + expectedChecksum + ", but got: " + actualChecksum);
+      throw new IOException("SHA512 checksum validation failed for " + 
downloadedModel.getFileName() +
+          ". Expected: " + expectedChecksum + ", but got: " + actualChecksum);
     }
   }
 
@@ -198,6 +231,7 @@ public class DownloadUtil {
       try (InputStream fis = Files.newInputStream(file);
            DigestInputStream dis = new DigestInputStream(fis, digest)) {
         byte[] buffer = new byte[4096];
+        //noinspection StatementWithEmptyBody
         while (dis.read(buffer) != -1) {
           // Reading the file to update the digest
         }
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/util/AbstractDownloadUtilTest.java 
b/opennlp-tools/src/test/java/opennlp/tools/util/AbstractDownloadUtilTest.java
deleted file mode 100644
index 9fdde8b6..00000000
--- 
a/opennlp-tools/src/test/java/opennlp/tools/util/AbstractDownloadUtilTest.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.tools.util;
-
-import java.io.IOException;
-import java.net.InetSocketAddress;
-import java.net.Socket;
-import java.nio.file.DirectoryStream;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-
-import org.junit.jupiter.api.BeforeAll;
-
-import opennlp.tools.EnabledWhenCDNAvailable;
-
-import static org.junit.jupiter.api.Assertions.fail;
-
-@EnabledWhenCDNAvailable(hostname = "dlcdn.apache.org")
-public abstract class AbstractDownloadUtilTest {
-
-  private static final String APACHE_CDN = "dlcdn.apache.org";
-
-  @BeforeAll
-  public static void cleanupWhenOnline() {
-    boolean isOnline;
-    try (Socket socket = new Socket()) {
-      socket.connect(new InetSocketAddress(APACHE_CDN, 80), 
EnabledWhenCDNAvailable.TIMEOUT_MS);
-      isOnline = true;
-    } catch (IOException e) {
-      // Unreachable, unresolvable or timeout
-      isOnline = false;
-    }
-    // If CDN is available -> go cleanup in preparation of the actual tests
-    if (isOnline) {
-      wipeExistingModelFiles("-tokens-");
-      wipeExistingModelFiles("-sentence-");
-      wipeExistingModelFiles("-pos-");
-      wipeExistingModelFiles("-lemmas-");
-    }
-  }
-
-
-  /*
-   * Helper method that wipes out mode files if they exist on the text 
execution env.
-   * Those model files are wiped from a hidden '.opennlp' subdirectory.
-   *
-   * Thereby, a clean download can be guaranteed - ín CDN is available and 
test are executed.
-   */
-  private static void wipeExistingModelFiles(final String fragment) {
-    final Path dir = Paths.get(System.getProperty("OPENNLP_DOWNLOAD_HOME",
-        System.getProperty("user.home"))).resolve(".opennlp");
-    if (Files.exists(dir)) {
-      try (DirectoryStream<Path> stream = Files.newDirectoryStream(dir, 
"*opennlp-*" + fragment + "*")) {
-        for (Path modelFileToWipe : stream) {
-          Files.deleteIfExists(modelFileToWipe);
-        }
-      } catch (IOException e) {
-        fail(e.getLocalizedMessage());
-      }
-    }
-  }
-
-}
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/util/DownloadUtilDownloadTwiceTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/util/DownloadUtilDownloadTwiceTest.java
index 5f328b00..24bad516 100644
--- 
a/opennlp-tools/src/test/java/opennlp/tools/util/DownloadUtilDownloadTwiceTest.java
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/util/DownloadUtilDownloadTwiceTest.java
@@ -35,7 +35,7 @@ import opennlp.tools.sentdetect.SentenceModel;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
 @EnabledWhenCDNAvailable(hostname = "dlcdn.apache.org")
-public class DownloadUtilDownloadTwiceTest extends AbstractDownloadUtilTest {
+public class DownloadUtilDownloadTwiceTest {
 
   /*
    * Programmatic change to debug log to ensure that we can see log messages to
@@ -60,24 +60,24 @@ public class DownloadUtilDownloadTwiceTest extends 
AbstractDownloadUtilTest {
 
   @Test
   public void testDownloadModelTwice() throws IOException {
+    String lang = "de";
+    DownloadUtil.ModelType type = DownloadUtil.ModelType.SENTENCE_DETECTOR;
+    
     try (LogCaptor logCaptor = LogCaptor.forClass(DownloadUtil.class)) {
-
-      DownloadUtil.downloadModel("de",
-          DownloadUtil.ModelType.SENTENCE_DETECTOR, SentenceModel.class);
-
-      assertEquals(2, logCaptor.getDebugLogs().size());
-      checkDebugLogsContainMessageFragment(logCaptor.getDebugLogs(), "Download 
complete.");
+      boolean alreadyDownloaded = DownloadUtil.existsModel(lang, type);
+      DownloadUtil.downloadModel(lang, type, SentenceModel.class);
+
+      if (! alreadyDownloaded) {
+        assertEquals(2, logCaptor.getDebugLogs().size());
+        checkDebugLogsContainMessageFragment(logCaptor.getDebugLogs(), 
"Download complete.");
+      } else {
+        assertEquals(1, logCaptor.getDebugLogs().size());
+        checkDebugLogsContainMessageFragment(logCaptor.getDebugLogs(), 
"already exists. Skipping download.");
+      }
       logCaptor.clearLogs();
 
       // try to download again
-      DownloadUtil.downloadModel("de",
-          DownloadUtil.ModelType.SENTENCE_DETECTOR, SentenceModel.class);
-      assertEquals(1, logCaptor.getDebugLogs().size());
-      checkDebugLogsContainMessageFragment(logCaptor.getDebugLogs(), "already 
exists. Skipping download.");
-      logCaptor.clearLogs();
-
-      DownloadUtil.downloadModel("de",
-          DownloadUtil.ModelType.SENTENCE_DETECTOR, SentenceModel.class);
+      DownloadUtil.downloadModel(lang, type, SentenceModel.class);
       assertEquals(1, logCaptor.getDebugLogs().size());
       checkDebugLogsContainMessageFragment(logCaptor.getDebugLogs(), "already 
exists. Skipping download.");
       logCaptor.clearLogs();
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/util/DownloadUtilTest.java 
b/opennlp-tools/src/test/java/opennlp/tools/util/DownloadUtilTest.java
index b8a61f91..8c3a67c1 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/util/DownloadUtilTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/DownloadUtilTest.java
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.net.URL;
 import java.util.stream.Stream;
 
+import org.junit.jupiter.api.Test;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.Arguments;
 import org.junit.jupiter.params.provider.MethodSource;
@@ -32,11 +33,12 @@ import opennlp.tools.sentdetect.SentenceModel;
 import opennlp.tools.tokenize.TokenizerModel;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class DownloadUtilTest extends AbstractDownloadUtilTest {
+public class DownloadUtilTest {
 
   @ParameterizedTest(name = "Verify \"{0}\" sentence model")
   @ValueSource(strings = {"en", "fr", "de", "it", "nl", "bg", "ca", "cs", 
"da", "el",
@@ -62,6 +64,27 @@ public class DownloadUtilTest extends 
AbstractDownloadUtilTest {
     assertTrue(model.isLoadedFromSerialized());
   }
 
+  @Test
+  @EnabledWhenCDNAvailable(hostname = "dlcdn.apache.org")
+  public void testExistsModel() throws IOException {
+    final String lang = "en";
+    final DownloadUtil.ModelType type = 
DownloadUtil.ModelType.SENTENCE_DETECTOR;
+    // Prepare
+    SentenceModel model = DownloadUtil.downloadModel(lang, type, 
SentenceModel.class);
+    assertNotNull(model);
+    assertEquals(lang, model.getLanguage());
+    // Test
+    assertTrue(DownloadUtil.existsModel(lang, type));
+  }
+
+  @ParameterizedTest
+  @NullAndEmptySource
+  @ValueSource(strings = {"xy", "\t", "\n"})
+  @EnabledWhenCDNAvailable(hostname = "dlcdn.apache.org")
+  public void testExistsModelInvalid(String input) throws IOException {
+    assertFalse(DownloadUtil.existsModel(input, 
DownloadUtil.ModelType.SENTENCE_DETECTOR));
+  }
+
   @ParameterizedTest(name = "Detect invalid input: \"{0}\"")
   @NullAndEmptySource
   @ValueSource(strings = {" ", "\t", "\n"})

Reply via email to