This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch 
OPENNLP-1829-Transfer-DirectoryModelFinder-to-core
in repository https://gitbox.apache.org/repos/asf/opennlp.git

commit bd45c1d9235e6ab776bd5d2a925e94b9c47332d1
Author: Martin Wiesner <[email protected]>
AuthorDate: Tue May 12 10:02:41 2026 +0200

    OPENNLP-1829: Transfer DirectoryModelFinder to OpenNLP core
---
 .../tools/models/AbstractClassPathModelFinder.java |  70 ++++++++++
 .../tools/models/dir/DirectoryModelFinder.java     | 145 +++++++++++++++++++++
 .../models/simple/SimpleClassPathModelFinder.java  |  59 ---------
 3 files changed, 215 insertions(+), 59 deletions(-)

diff --git 
a/opennlp-core/opennlp-model-resolver/src/main/java/opennlp/tools/models/AbstractClassPathModelFinder.java
 
b/opennlp-core/opennlp-model-resolver/src/main/java/opennlp/tools/models/AbstractClassPathModelFinder.java
index b31128dd8..2050892c8 100644
--- 
a/opennlp-core/opennlp-model-resolver/src/main/java/opennlp/tools/models/AbstractClassPathModelFinder.java
+++ 
b/opennlp-core/opennlp-model-resolver/src/main/java/opennlp/tools/models/AbstractClassPathModelFinder.java
@@ -16,12 +16,22 @@
  */
 package opennlp.tools.models;
 
+import java.io.IOException;
+import java.net.JarURLConnection;
 import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Enumeration;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Locale;
 import java.util.Objects;
 import java.util.Optional;
 import java.util.Set;
+import java.util.jar.JarEntry;
+import java.util.jar.JarFile;
+import java.util.regex.Pattern;
 
 /**
  * A base implementation of a {@link ClassPathModelFinder} for the detection of
@@ -126,4 +136,64 @@ public abstract class AbstractClassPathModelFinder 
implements ClassPathModelFind
     return jarModelPrefix;
   }
 
+  /**
+   * Escapes a {@code wildcard} expressions for usage as a Java regular 
expression.
+   *
+   * @param wildcard A valid expression. It must not be {@code null}.
+   * @return The escaped regex.
+   */
+  protected String asRegex(String wildcard) {
+    return wildcard
+        .replace(".", "\\.")
+        .replace("*", ".*")
+        .replace("?", ".");
+  }
+
+  protected boolean matchesPattern(URL url, Pattern pattern) {
+    return pattern.matcher(url.getFile()).matches();
+  }
+
+  /**
+   * Converts a {@code location} in String form to a {@link URL}.
+   *
+   * @param location The resource path and/or reference.
+   * @return The converted {@link URL} form.
+   * @throws IOException Thrown if IO errors occurred during conversion
+   */
+  protected static URL toURL(String location) throws IOException {
+    try {
+      return new URI(location).toURL();
+    } catch (URISyntaxException e) {
+      throw new IOException(e);
+    }
+  }
+
+  protected List<URI> getURIsFromJar(URL fileUrl, boolean isWindows) throws 
IOException {
+    final List<URI> uris = new ArrayList<>();
+    final String location = JAR + ":" +
+        (isWindows ? fileUrl.toString().replace("\\", "/")
+            : fileUrl.toString()) + "!/";
+    final URL jarUrl = toURL(location);
+    final JarURLConnection jarConnection = (JarURLConnection) 
jarUrl.openConnection();
+    try (JarFile jarFile = jarConnection.getJarFile()) {
+      final Enumeration<JarEntry> entries = jarFile.entries();
+      while (entries.hasMoreElements()) {
+        final JarEntry entry = entries.nextElement();
+        if (!entry.isDirectory()) {
+          try {
+            uris.add(new URI(jarUrl + entry.getName()));
+          } catch (URISyntaxException ignored) {
+            //if we cannot convert to URI here, we ignore that entry.
+          }
+        }
+      }
+    }
+
+    return uris;
+  }
+
+  protected boolean isWindows() {
+    return System.getProperty("os.name", 
"unknown").toLowerCase(Locale.ROOT).contains("win");
+  }
+
 }
diff --git 
a/opennlp-core/opennlp-model-resolver/src/main/java/opennlp/tools/models/dir/DirectoryModelFinder.java
 
b/opennlp-core/opennlp-model-resolver/src/main/java/opennlp/tools/models/dir/DirectoryModelFinder.java
new file mode 100644
index 000000000..e31c7552c
--- /dev/null
+++ 
b/opennlp-core/opennlp-model-resolver/src/main/java/opennlp/tools/models/dir/DirectoryModelFinder.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package opennlp.tools.models.dir;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.regex.Pattern;
+import java.util.stream.Stream;
+
+import org.slf4j.LoggerFactory;
+
+import opennlp.tools.models.AbstractClassPathModelFinder;
+import opennlp.tools.models.ClassPathModelFinder;
+
+/**
+ * The {@code DirectoryModelFinder} class is responsible for finding model 
files in a given directory
+ * on the classpath.
+ *
+ * <p>This class allows searching for models based on wildcard patterns, 
either in plain directory structures
+ * or within JAR files. The search can be performed recursively depending on 
the specified configuration.
+ *
+ * <p><b>Usage:</b>
+ * <ul>
+ *   <li>Provide the prefix for models to be found in JAR files using the 
{@code jarModelPrefix} parameter.</li>
+ *   <li>Specify the directory to search and whether to enable recursive 
scanning.</li>
+ *   <li>The class supports resolving both direct file matches and entries 
within JAR archives.</li>
+ * </ul>
+ *
+ * @see AbstractClassPathModelFinder
+ * @see ClassPathModelFinder
+ */
+public class DirectoryModelFinder extends AbstractClassPathModelFinder 
implements ClassPathModelFinder {
+
+  private static final org.slf4j.Logger logger = 
LoggerFactory.getLogger(DirectoryModelFinder.class);
+
+  private final Path directory;
+  private final boolean recursive;
+  private final Pattern jarPattern;
+  private Pattern filePattern;
+  private String prevFilePattern;
+
+  /**
+   * Instantiates a new {@link DirectoryModelFinder} with the specified 
parameters.
+   *
+   * @param jarModelPrefix The prefix for identifying model files in JAR 
archives; may be {@code null}.
+   *                       If it is {@code null}, {@link 
ClassPathModelFinder#OPENNLP_MODEL_JAR_PREFIX} is used.
+   * @param directory      The root directory to scan from for model files; 
must not be {@code null}.
+   * @param recursive      {@code true} if the search should include 
subdirectories, {@code false} otherwise.
+   * @throws IllegalArgumentException Thrown if {@code directory} is {@code 
null}.
+   */
+  public DirectoryModelFinder(String jarModelPrefix, Path directory, boolean 
recursive) {
+    super(jarModelPrefix == null ? OPENNLP_MODEL_JAR_PREFIX : jarModelPrefix);
+    if (directory == null) {
+      throw new IllegalArgumentException("Given directory must not be NULL");
+    }
+    this.directory = directory;
+    this.recursive = recursive;
+    this.jarPattern = Pattern.compile(asRegex("*" + getJarModelPrefix()));
+  }
+
+  /**
+   * @return Always {@code null} as it is not needed for the directory case.
+   */
+  @Override
+  protected Object getContext() {
+    return null; //not needed for the simple case. Just return NULL.
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  protected List<URI> getMatchingURIs(String wildcardPattern, Object context) {
+    if (wildcardPattern == null) {
+      return Collections.emptyList();
+    }
+
+    final boolean isWindows = isWindows();
+    final List<URL> cp = getDirectoryContent();
+    final List<URI> cpu = new ArrayList<>();
+    final String filePatternString = asRegex("*" + wildcardPattern);
+    if(!filePatternString.equals(prevFilePattern)) {
+      this.filePattern = Pattern.compile(filePatternString);
+      this.prevFilePattern = filePatternString;
+    }
+
+    for (URL url : cp) {
+      if (matchesPattern(url, jarPattern)) {
+        try {
+          for (URI u : getURIsFromJar(url, isWindows)) {
+            if (matchesPattern(u.toURL(), filePattern)) {
+              cpu.add(u);
+            }
+          }
+        } catch (IOException e) {
+          logger.warn("Cannot read content of {}.", url, e);
+        }
+      }
+    }
+
+    return cpu;
+  }
+
+  private List<URL> getDirectoryContent() {
+    final List<URL> fileList = new ArrayList<>();
+    try (Stream<Path> files = Files.walk(directory, recursive ? 
Integer.MAX_VALUE : 1)) {
+      files.filter(Files::isRegularFile).forEach(path -> {
+        try {
+          fileList.add(path.toUri().toURL());
+        } catch (MalformedURLException ignored) {
+
+        }
+      });
+    } catch (IOException e) {
+      logger.warn(e.getLocalizedMessage(), e);
+    }
+    return fileList;
+  }
+
+
+
+}
diff --git 
a/opennlp-core/opennlp-model-resolver/src/main/java/opennlp/tools/models/simple/SimpleClassPathModelFinder.java
 
b/opennlp-core/opennlp-model-resolver/src/main/java/opennlp/tools/models/simple/SimpleClassPathModelFinder.java
index 44bf4d61a..10b83f938 100644
--- 
a/opennlp-core/opennlp-model-resolver/src/main/java/opennlp/tools/models/simple/SimpleClassPathModelFinder.java
+++ 
b/opennlp-core/opennlp-model-resolver/src/main/java/opennlp/tools/models/simple/SimpleClassPathModelFinder.java
@@ -19,21 +19,15 @@ package opennlp.tools.models.simple;
 import java.io.IOException;
 import java.lang.reflect.Field;
 import java.lang.reflect.Method;
-import java.net.JarURLConnection;
 import java.net.MalformedURLException;
 import java.net.URI;
-import java.net.URISyntaxException;
 import java.net.URL;
 import java.net.URLClassLoader;
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
-import java.util.Enumeration;
 import java.util.List;
-import java.util.Locale;
-import java.util.jar.JarEntry;
-import java.util.jar.JarFile;
 import java.util.regex.Pattern;
 
 import org.slf4j.Logger;
@@ -131,59 +125,6 @@ public class SimpleClassPathModelFinder extends 
AbstractClassPathModelFinder imp
     return cpu;
   }
 
-  /**
-   * Escapes a {@code wildcard} expressions for usage as a Java regular 
expression.
-   *
-   * @param wildcard A valid expression. It must not be {@code null}.
-   * @return The escaped regex.
-   */
-  private String asRegex(String wildcard) {
-    return wildcard
-        .replace(".", "\\.")
-        .replace("*", ".*")
-        .replace("?", ".");
-  }
-
-  private boolean matchesPattern(URL url, Pattern pattern) {
-    return pattern.matcher(url.getFile()).matches();
-  }
-
-  private static URL toURL(String location) throws IOException {
-    try {
-      return new URI(location).toURL();
-    } catch (URISyntaxException e) {
-      throw new IOException(e);
-    }
-  }
-
-  private List<URI> getURIsFromJar(URL fileUrl, boolean isWindows) throws 
IOException {
-    final List<URI> uris = new ArrayList<>();
-    final String location = JAR + ":" +
-        (isWindows ? fileUrl.toString().replace("\\", "/")
-            : fileUrl.toString()) + "!/";
-    final URL jarUrl = toURL(location);
-    final JarURLConnection jarConnection = (JarURLConnection) 
jarUrl.openConnection();
-    try (JarFile jarFile = jarConnection.getJarFile()) {
-      final Enumeration<JarEntry> entries = jarFile.entries();
-      while (entries.hasMoreElements()) {
-        final JarEntry entry = entries.nextElement();
-        if (!entry.isDirectory()) {
-          try {
-            uris.add(new URI(jarUrl + entry.getName()));
-          } catch (URISyntaxException ignored) {
-            //if we cannot convert to URI here, we ignore that entry.
-          }
-        }
-      }
-    }
-
-    return uris;
-  }
-
-  private boolean isWindows() {
-    return System.getProperty("os.name", 
"unknown").toLowerCase(Locale.ROOT).contains("win");
-  }
-
   /**
    * Attempts to obtain {@link URL URLs} from the classpath in the following 
order:
    * <p>

Reply via email to