This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch opennlp-2.x
in repository https://gitbox.apache.org/repos/asf/opennlp.git


The following commit(s) were added to refs/heads/opennlp-2.x by this push:
     new 95f401d44 [2.x] OPENNLP-1829: Transfer DirectoryModelFinder to OpenNLP 
core (#1054)
95f401d44 is described below

commit 95f401d44dce4199ebb53f1a7fb9f46ab489031c
Author: Martin Wiesner <[email protected]>
AuthorDate: Mon May 18 10:27:02 2026 +0200

    [2.x] OPENNLP-1829: Transfer DirectoryModelFinder to OpenNLP core (#1054)
---
 .../tools/models/AbstractClassPathModelFinder.java |  70 ++++++++++
 .../tools/models/dir/DirectoryModelFinder.java     | 145 +++++++++++++++++++++
 .../models/simple/SimpleClassPathModelFinder.java  |  61 +--------
 3 files changed, 220 insertions(+), 56 deletions(-)

diff --git 
a/opennlp-tools-models/src/main/java/opennlp/tools/models/AbstractClassPathModelFinder.java
 
b/opennlp-tools-models/src/main/java/opennlp/tools/models/AbstractClassPathModelFinder.java
index b31128dd8..2050892c8 100644
--- 
a/opennlp-tools-models/src/main/java/opennlp/tools/models/AbstractClassPathModelFinder.java
+++ 
b/opennlp-tools-models/src/main/java/opennlp/tools/models/AbstractClassPathModelFinder.java
@@ -16,12 +16,22 @@
  */
 package opennlp.tools.models;
 
+import java.io.IOException;
+import java.net.JarURLConnection;
 import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Enumeration;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Locale;
 import java.util.Objects;
 import java.util.Optional;
 import java.util.Set;
+import java.util.jar.JarEntry;
+import java.util.jar.JarFile;
+import java.util.regex.Pattern;
 
 /**
  * A base implementation of a {@link ClassPathModelFinder} for the detection of
@@ -126,4 +136,64 @@ public abstract class AbstractClassPathModelFinder 
implements ClassPathModelFind
     return jarModelPrefix;
   }
 
+  /**
+   * Escapes a {@code wildcard} expressions for usage as a Java regular 
expression.
+   *
+   * @param wildcard A valid expression. It must not be {@code null}.
+   * @return The escaped regex.
+   */
+  protected String asRegex(String wildcard) {
+    return wildcard
+        .replace(".", "\\.")
+        .replace("*", ".*")
+        .replace("?", ".");
+  }
+
+  protected boolean matchesPattern(URL url, Pattern pattern) {
+    return pattern.matcher(url.getFile()).matches();
+  }
+
+  /**
+   * Converts a {@code location} in String form to a {@link URL}.
+   *
+   * @param location The resource path and/or reference.
+   * @return The converted {@link URL} form.
+   * @throws IOException Thrown if IO errors occurred during conversion
+   */
+  protected static URL toURL(String location) throws IOException {
+    try {
+      return new URI(location).toURL();
+    } catch (URISyntaxException e) {
+      throw new IOException(e);
+    }
+  }
+
+  protected List<URI> getURIsFromJar(URL fileUrl, boolean isWindows) throws 
IOException {
+    final List<URI> uris = new ArrayList<>();
+    final String location = JAR + ":" +
+        (isWindows ? fileUrl.toString().replace("\\", "/")
+            : fileUrl.toString()) + "!/";
+    final URL jarUrl = toURL(location);
+    final JarURLConnection jarConnection = (JarURLConnection) 
jarUrl.openConnection();
+    try (JarFile jarFile = jarConnection.getJarFile()) {
+      final Enumeration<JarEntry> entries = jarFile.entries();
+      while (entries.hasMoreElements()) {
+        final JarEntry entry = entries.nextElement();
+        if (!entry.isDirectory()) {
+          try {
+            uris.add(new URI(jarUrl + entry.getName()));
+          } catch (URISyntaxException ignored) {
+            //if we cannot convert to URI here, we ignore that entry.
+          }
+        }
+      }
+    }
+
+    return uris;
+  }
+
+  protected boolean isWindows() {
+    return System.getProperty("os.name", 
"unknown").toLowerCase(Locale.ROOT).contains("win");
+  }
+
 }
diff --git 
a/opennlp-tools-models/src/main/java/opennlp/tools/models/dir/DirectoryModelFinder.java
 
b/opennlp-tools-models/src/main/java/opennlp/tools/models/dir/DirectoryModelFinder.java
new file mode 100644
index 000000000..9a7b3d1d6
--- /dev/null
+++ 
b/opennlp-tools-models/src/main/java/opennlp/tools/models/dir/DirectoryModelFinder.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.tools.models.dir;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.regex.Pattern;
+import java.util.stream.Stream;
+
+import org.slf4j.LoggerFactory;
+
+import opennlp.tools.models.AbstractClassPathModelFinder;
+import opennlp.tools.models.ClassPathModelFinder;
+
+/**
+ * The {@code DirectoryModelFinder} class is responsible for finding model 
files in a given directory
+ * on the classpath.
+ *
+ * <p>This class allows searching for models based on wildcard patterns, 
either in plain directory structures
+ * or within JAR files. The search can be performed recursively depending on 
the specified configuration.
+ *
+ * <p><b>Usage:</b>
+ * <ul>
+ *   <li>Provide the prefix for models to be found in JAR files using the 
{@code jarModelPrefix}
+ *   parameter.</li>
+ *   <li>Specify the directory to search and whether to enable recursive 
scanning.</li>
+ *   <li>The class supports resolving both direct file matches and entries 
within JAR archives.</li>
+ * </ul>
+ *
+ * @see AbstractClassPathModelFinder
+ * @see ClassPathModelFinder
+ */
+public class DirectoryModelFinder extends AbstractClassPathModelFinder 
implements ClassPathModelFinder {
+
+  private static final org.slf4j.Logger logger = 
LoggerFactory.getLogger(DirectoryModelFinder.class);
+
+  private final Path directory;
+  private final boolean recursive;
+  private final Pattern jarPattern;
+  private Pattern filePattern;
+  private String prevFilePattern;
+
+  /**
+   * Instantiates a new {@link DirectoryModelFinder} with the specified 
parameters.
+   *
+   * @param jarModelPrefix The prefix for identifying model files in JAR 
archives; may be {@code null}.
+   *                       If it is {@code null}, {@link 
ClassPathModelFinder#OPENNLP_MODEL_JAR_PREFIX}
+   *                       is used.
+   * @param directory      The root directory to scan from for model files; 
must not be {@code null}.
+   * @param recursive      {@code true} if the search should include 
subdirectories, {@code false} otherwise.
+   * @throws IllegalArgumentException Thrown if {@code directory} is {@code 
null}.
+   */
+  public DirectoryModelFinder(String jarModelPrefix, Path directory, boolean 
recursive) {
+    super(jarModelPrefix == null ? OPENNLP_MODEL_JAR_PREFIX : jarModelPrefix);
+    if (directory == null) {
+      throw new IllegalArgumentException("Given directory must not be NULL");
+    }
+    this.directory = directory;
+    this.recursive = recursive;
+    this.jarPattern = Pattern.compile(asRegex("*" + getJarModelPrefix()));
+  }
+
+  /**
+   * @return Always {@code null} as it is not needed for the directory case.
+   */
+  @Override
+  protected Object getContext() {
+    return null; //not needed for the simple case. Just return NULL.
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  protected List<URI> getMatchingURIs(String wildcardPattern, Object context) {
+    if (wildcardPattern == null) {
+      return Collections.emptyList();
+    }
+
+    final boolean isWindows = isWindows();
+    final List<URL> cp = getDirectoryContent();
+    final List<URI> cpu = new ArrayList<>();
+    final String filePatternString = asRegex("*" + wildcardPattern);
+    if (!filePatternString.equals(prevFilePattern)) {
+      this.filePattern = Pattern.compile(filePatternString);
+      this.prevFilePattern = filePatternString;
+    }
+
+    for (URL url : cp) {
+      if (matchesPattern(url, jarPattern)) {
+        try {
+          for (URI u : getURIsFromJar(url, isWindows)) {
+            if (matchesPattern(u.toURL(), filePattern)) {
+              cpu.add(u);
+            }
+          }
+        } catch (IOException e) {
+          logger.warn("Cannot read content of {}.", url, e);
+        }
+      }
+    }
+
+    return cpu;
+  }
+
+  private List<URL> getDirectoryContent() {
+    final List<URL> fileList = new ArrayList<>();
+    try (Stream<Path> files = Files.walk(directory, recursive ? 
Integer.MAX_VALUE : 1)) {
+      files.filter(Files::isRegularFile).forEach(path -> {
+        try {
+          fileList.add(path.toUri().toURL());
+        } catch (MalformedURLException ignored) {
+
+        }
+      });
+    } catch (IOException e) {
+      logger.warn(e.getLocalizedMessage(), e);
+    }
+    return fileList;
+  }
+
+
+
+}
diff --git 
a/opennlp-tools-models/src/main/java/opennlp/tools/models/simple/SimpleClassPathModelFinder.java
 
b/opennlp-tools-models/src/main/java/opennlp/tools/models/simple/SimpleClassPathModelFinder.java
index ebc7da9f0..e189cef8d 100644
--- 
a/opennlp-tools-models/src/main/java/opennlp/tools/models/simple/SimpleClassPathModelFinder.java
+++ 
b/opennlp-tools-models/src/main/java/opennlp/tools/models/simple/SimpleClassPathModelFinder.java
@@ -19,20 +19,15 @@ package opennlp.tools.models.simple;
 import java.io.IOException;
 import java.lang.reflect.Field;
 import java.lang.reflect.Method;
-import java.net.JarURLConnection;
 import java.net.MalformedURLException;
 import java.net.URI;
-import java.net.URISyntaxException;
 import java.net.URL;
 import java.net.URLClassLoader;
+import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
-import java.util.Enumeration;
 import java.util.List;
-import java.util.Locale;
-import java.util.jar.JarEntry;
-import java.util.jar.JarFile;
 import java.util.regex.Pattern;
 
 import org.slf4j.Logger;
@@ -45,7 +40,7 @@ import opennlp.tools.models.ClassPathModelFinder;
  * Enables the detection of OpenNLP models in the classpath via JDK classes
  * By default, this class will search for JAR files starting with 
"opennlp-models-*".
  * This wildcard pattern can be adjusted by using the alternative constructor 
of this class.
- *   
+ *
  * @implNote
  * It is a rather simple implementation of scanning the classpath by trying to 
obtain {@link URL urls}
  * from the actual classpath via a chain of possible options. It might not 
work for every use-case
@@ -68,7 +63,6 @@ import opennlp.tools.models.ClassPathModelFinder;
 public class SimpleClassPathModelFinder extends AbstractClassPathModelFinder 
implements ClassPathModelFinder {
 
   private static final Logger logger = 
LoggerFactory.getLogger(SimpleClassPathModelFinder.class);
-  private static final String FILE_PREFIX = "file";
   private static final Pattern CLASSPATH_SEPARATOR_PATTERN_WINDOWS = 
Pattern.compile(";");
   private static final Pattern CLASSPATH_SEPARATOR_PATTERN_UNIX = 
Pattern.compile(":");
   // ; for Windows, : for Linux/OSX
@@ -131,51 +125,6 @@ public class SimpleClassPathModelFinder extends 
AbstractClassPathModelFinder imp
     return cpu;
   }
 
-  /**
-   * Escapes a {@code wildcard} expressions for usage as a Java regular 
expression.
-   *
-   * @param wildcard A valid expression. It must not be {@code null}.
-   * @return The escaped regex.
-   */
-  private String asRegex(String wildcard) {
-    return wildcard
-        .replace(".", "\\.")
-        .replace("*", ".*")
-        .replace("?", ".");
-  }
-
-  private boolean matchesPattern(URL url, Pattern pattern) {
-    return pattern.matcher(url.getFile()).matches();
-  }
-
-  private List<URI> getURIsFromJar(URL fileUrl, boolean isWindows) throws 
IOException {
-    final List<URI> uris = new ArrayList<>();
-    final URL jarUrl = new URL(JAR + ":" +
-        (isWindows ? fileUrl.toString().replace("\\", "/")
-            : fileUrl.toString()) + "!/");
-    final JarURLConnection jarConnection = (JarURLConnection) 
jarUrl.openConnection();
-    try (JarFile jarFile = jarConnection.getJarFile()) {
-      final Enumeration<JarEntry> entries = jarFile.entries();
-      while (entries.hasMoreElements()) {
-        final JarEntry entry = entries.nextElement();
-        if (!entry.isDirectory()) {
-          final URL entryUrl = new URL(jarUrl + entry.getName());
-          try {
-            uris.add(entryUrl.toURI());
-          } catch (URISyntaxException ignored) {
-            //if we cannot convert to URI here, we ignore that entry.
-          }
-        }
-      }
-    }
-
-    return uris;
-  }
-
-  private boolean isWindows() {
-    return System.getProperty("os.name", 
"unknown").toLowerCase(Locale.ROOT).contains("win");
-  }
-
   /**
    * Attempts to obtain {@link URL URLs} from the classpath in the following 
order:
    * <p>
@@ -206,12 +155,12 @@ public class SimpleClassPathModelFinder extends 
AbstractClassPathModelFinder imp
   private List<URL> getClassPathUrlsFromSystemProperty() {
     final String cp = System.getProperty("java.class.path", "");
     final String[] matches = isWindows()
-            ? CLASSPATH_SEPARATOR_PATTERN_WINDOWS.split(cp)
-            : CLASSPATH_SEPARATOR_PATTERN_UNIX.split(cp);
+        ? CLASSPATH_SEPARATOR_PATTERN_WINDOWS.split(cp)
+        : CLASSPATH_SEPARATOR_PATTERN_UNIX.split(cp);
     final List<URL> jarUrls = new ArrayList<>();
     for (String classPath: matches) {
       try {
-        jarUrls.add(new URL(FILE_PREFIX, "", classPath));
+        jarUrls.add(Path.of(classPath).toUri().toURL());
       } catch (MalformedURLException ignored) {
         //if we cannot parse a URL from the system property, just ignore it...
         //we couldn't load it anyway

Reply via email to