This is an automated email from the ASF dual-hosted git repository.

rzo1 pushed a commit to branch opennlp-models-optional-classgraph-wip
in repository https://gitbox.apache.org/repos/asf/opennlp.git

commit 3e0abede175c5e6ad9de656a1b7d3833becc97ac
Author: Richard Zowalla <[email protected]>
AuthorDate: Fri Jun 14 12:15:47 2024 +0200

    wip
---
 ...nder.java => AbstractClassPathModelFinder.java} |  83 +++++++------
 .../opennlp/tools/models/ClassPathModelFinder.java |  95 +-------------
 .../tools/models/ClassgraphModelFinder.java        |  64 ++++++++++
 .../tools/models/SimpleClassPathModelFinder.java   | 136 +++++++++++++++++++++
 .../tools/models/AbstractClassPathModelTest.java   |   2 +-
 .../tools/models/ClassPathModelFinderTest.java     |   4 +-
 ...st.java => SimpleClassPathModelFinderTest.java} |  14 +--
 7 files changed, 253 insertions(+), 145 deletions(-)

diff --git 
a/opennlp-tools-models/src/main/java/opennlp/tools/models/ClassPathModelFinder.java
 
b/opennlp-tools-models/src/main/java/opennlp/tools/models/AbstractClassPathModelFinder.java
similarity index 58%
copy from 
opennlp-tools-models/src/main/java/opennlp/tools/models/ClassPathModelFinder.java
copy to 
opennlp-tools-models/src/main/java/opennlp/tools/models/AbstractClassPathModelFinder.java
index 18b527f6..b82207a2 100644
--- 
a/opennlp-tools-models/src/main/java/opennlp/tools/models/ClassPathModelFinder.java
+++ 
b/opennlp-tools-models/src/main/java/opennlp/tools/models/AbstractClassPathModelFinder.java
@@ -23,78 +23,76 @@ import java.util.Objects;
 import java.util.Optional;
 import java.util.Set;
 
-import io.github.classgraph.ClassGraph;
-import io.github.classgraph.ResourceList;
-import io.github.classgraph.ScanResult;
-
-
 /**
  * Enables the detection of OpenNLP models in the classpath. By default, this 
class will search
  * for JAR files starting with "opennlp-models-*". This wildcard pattern can 
be adjusted by
  * using the alternative constructor of this class.
  */
-public class ClassPathModelFinder {
+public abstract class AbstractClassPathModelFinder implements 
ClassPathModelFinder {
 
-  private static final String OPENNLP_MODEL_JAR_PREFIX = 
"opennlp-models-*.jar";
   private static final String JAR = "jar";
+
   private final String jarModelPrefix;
   private Set<ClassPathModelEntry> models;
 
   /**
    * By default, it scans for "opennlp-models-*.jar".
    */
-  public ClassPathModelFinder() {
+  public AbstractClassPathModelFinder() {
     this(OPENNLP_MODEL_JAR_PREFIX);
   }
 
   /**
-   * @param modelJarPrefix The leafnames of the jars that should be canned 
(e.g. "opennlp.jar").
+   * @param jarModelPrefix The leafnames of the jars that should be canned 
(e.g. "opennlp.jar").
    *                      May contain a wildcard glob ("opennlp-*.jar"). It 
must not be {@code null}.
    */
-  public ClassPathModelFinder(String modelJarPrefix) {
-    Objects.requireNonNull(modelJarPrefix, "modelJarPrefix must not be null");
-    this.jarModelPrefix = modelJarPrefix;
+  public AbstractClassPathModelFinder(String jarModelPrefix) {
+    Objects.requireNonNull(jarModelPrefix, "modelJarPrefix must not be null");
+    this.jarModelPrefix = jarModelPrefix;
   }
 
-  /**
-   * Finds OpenNLP models within the classpath.
-   *
-   * @param reloadCache {@code true}, if the internal cache should explicitly 
be reloaded
-   * @return A Set of {@link ClassPathModelEntry ClassPathModelEntries}. It 
might be empty.
-   */
+  @Override
   public Set<ClassPathModelEntry> findModels(boolean reloadCache) {
 
     if (this.models == null || reloadCache) {
-      try (ScanResult sr = new 
ClassGraph().acceptJars(jarModelPrefix).disableDirScanning().scan()) {
+      final List<URI> classpathModels = getMatchingURIs("*.bin", getContext());
+      final List<URI> classPathProperties = 
getMatchingURIs("model.properties", getContext());
 
-        final List<URI> classpathModels = getResourcesMatchingWildcard(sr, 
"*.bin");
-        final List<URI> classPathProperties = getResourcesMatchingWildcard(sr, 
"model.properties");
+      this.models = new HashSet<>();
 
-        this.models = new HashSet<>();
-
-        for (URI model : classpathModels) {
-          URI m = null;
-          for (URI prop : classPathProperties) {
-            if (jarPathsMatch(model, prop)) {
-              m = prop;
-              break;
-            }
+      for (URI model : classpathModels) {
+        URI m = null;
+        for (URI prop : classPathProperties) {
+          if (jarPathsMatch(model, prop)) {
+            m = prop;
+            break;
           }
-          this.models.add(new ClassPathModelEntry(model, 
Optional.ofNullable(m)));
-
         }
+        this.models.add(new ClassPathModelEntry(model, 
Optional.ofNullable(m)));
+
       }
     }
     return this.models;
   }
 
-  private List<URI> getResourcesMatchingWildcard(final ScanResult sr, final 
String resourceWildcard) {
-    try (final ResourceList resources = 
sr.getResourcesMatchingWildcard(resourceWildcard)) {
-      return resources.getURIs();
-    }
-  }
+  /**
+   * Subclasses can implement this method to provide additional context to
+   * {@link AbstractClassPathModelFinder#getMatchingURIs(String, Object)}.
+   *
+   * @return a context information. May be {@code null}.
+   */
+  protected abstract Object getContext();
+
+  /**
+   * Return matching classpath URIs for the given pattern.
+   *
+   * @param wildcardPattern the pattern. Must not be {@code null}.
+   * @param context         an object holding context information. It might be 
{@code null}.
+   * @return a list of matching classpath URIs.
+   */
+  protected abstract List<URI> getMatchingURIs(String wildcardPattern, Object 
context);
 
-  private boolean jarPathsMatch(URI uri1, URI uri2) {
+  protected boolean jarPathsMatch(URI uri1, URI uri2) {
     final String[] parts1 = parseJarURI(uri1);
     final String[] parts2 = parseJarURI(uri2);
 
@@ -105,7 +103,7 @@ public class ClassPathModelFinder {
     return parts1[0].equals(parts2[0]);
   }
 
-  private String[] parseJarURI(URI uri) {
+  protected String[] parseJarURI(URI uri) {
     if (JAR.equals(uri.getScheme())) {
       final String ssp = uri.getSchemeSpecificPart();
       final int separatorIndex = ssp.indexOf("!/");
@@ -118,4 +116,11 @@ public class ClassPathModelFinder {
     return null;
   }
 
+  protected String getJarModelPrefix() {
+    return jarModelPrefix;
+  }
+
+  protected Set<ClassPathModelEntry> getModels() {
+    return models;
+  }
 }
diff --git 
a/opennlp-tools-models/src/main/java/opennlp/tools/models/ClassPathModelFinder.java
 
b/opennlp-tools-models/src/main/java/opennlp/tools/models/ClassPathModelFinder.java
index 18b527f6..5603d325 100644
--- 
a/opennlp-tools-models/src/main/java/opennlp/tools/models/ClassPathModelFinder.java
+++ 
b/opennlp-tools-models/src/main/java/opennlp/tools/models/ClassPathModelFinder.java
@@ -16,45 +16,11 @@
  */
 package opennlp.tools.models;
 
-import java.net.URI;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Objects;
-import java.util.Optional;
 import java.util.Set;
 
-import io.github.classgraph.ClassGraph;
-import io.github.classgraph.ResourceList;
-import io.github.classgraph.ScanResult;
+public interface ClassPathModelFinder {
 
-
-/**
- * Enables the detection of OpenNLP models in the classpath. By default, this 
class will search
- * for JAR files starting with "opennlp-models-*". This wildcard pattern can 
be adjusted by
- * using the alternative constructor of this class.
- */
-public class ClassPathModelFinder {
-
-  private static final String OPENNLP_MODEL_JAR_PREFIX = 
"opennlp-models-*.jar";
-  private static final String JAR = "jar";
-  private final String jarModelPrefix;
-  private Set<ClassPathModelEntry> models;
-
-  /**
-   * By default, it scans for "opennlp-models-*.jar".
-   */
-  public ClassPathModelFinder() {
-    this(OPENNLP_MODEL_JAR_PREFIX);
-  }
-
-  /**
-   * @param modelJarPrefix The leafnames of the jars that should be canned 
(e.g. "opennlp.jar").
-   *                      May contain a wildcard glob ("opennlp-*.jar"). It 
must not be {@code null}.
-   */
-  public ClassPathModelFinder(String modelJarPrefix) {
-    Objects.requireNonNull(modelJarPrefix, "modelJarPrefix must not be null");
-    this.jarModelPrefix = modelJarPrefix;
-  }
+  String OPENNLP_MODEL_JAR_PREFIX = "opennlp-models-*.jar";
 
   /**
    * Finds OpenNLP models within the classpath.
@@ -62,60 +28,5 @@ public class ClassPathModelFinder {
    * @param reloadCache {@code true}, if the internal cache should explicitly 
be reloaded
    * @return A Set of {@link ClassPathModelEntry ClassPathModelEntries}. It 
might be empty.
    */
-  public Set<ClassPathModelEntry> findModels(boolean reloadCache) {
-
-    if (this.models == null || reloadCache) {
-      try (ScanResult sr = new 
ClassGraph().acceptJars(jarModelPrefix).disableDirScanning().scan()) {
-
-        final List<URI> classpathModels = getResourcesMatchingWildcard(sr, 
"*.bin");
-        final List<URI> classPathProperties = getResourcesMatchingWildcard(sr, 
"model.properties");
-
-        this.models = new HashSet<>();
-
-        for (URI model : classpathModels) {
-          URI m = null;
-          for (URI prop : classPathProperties) {
-            if (jarPathsMatch(model, prop)) {
-              m = prop;
-              break;
-            }
-          }
-          this.models.add(new ClassPathModelEntry(model, 
Optional.ofNullable(m)));
-
-        }
-      }
-    }
-    return this.models;
-  }
-
-  private List<URI> getResourcesMatchingWildcard(final ScanResult sr, final 
String resourceWildcard) {
-    try (final ResourceList resources = 
sr.getResourcesMatchingWildcard(resourceWildcard)) {
-      return resources.getURIs();
-    }
-  }
-
-  private boolean jarPathsMatch(URI uri1, URI uri2) {
-    final String[] parts1 = parseJarURI(uri1);
-    final String[] parts2 = parseJarURI(uri2);
-
-    if (parts1 == null || parts2 == null) {
-      return false;
-    }
-
-    return parts1[0].equals(parts2[0]);
-  }
-
-  private String[] parseJarURI(URI uri) {
-    if (JAR.equals(uri.getScheme())) {
-      final String ssp = uri.getSchemeSpecificPart();
-      final int separatorIndex = ssp.indexOf("!/");
-      if (separatorIndex > 0) {
-        final String jarFileUri = ssp.substring(0, separatorIndex);
-        final String entryPath = ssp.substring(separatorIndex + 2);
-        return new String[] {jarFileUri, entryPath};
-      }
-    }
-    return null;
-  }
-
+  Set<ClassPathModelEntry> findModels(boolean reloadCache);
 }
diff --git 
a/opennlp-tools-models/src/main/java/opennlp/tools/models/ClassgraphModelFinder.java
 
b/opennlp-tools-models/src/main/java/opennlp/tools/models/ClassgraphModelFinder.java
new file mode 100644
index 00000000..cd3343a6
--- /dev/null
+++ 
b/opennlp-tools-models/src/main/java/opennlp/tools/models/ClassgraphModelFinder.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.tools.models;
+
+import java.net.URI;
+import java.util.Collections;
+import java.util.List;
+
+import io.github.classgraph.ClassGraph;
+import io.github.classgraph.ResourceList;
+import io.github.classgraph.ScanResult;
+
+/**
+ * Enables the detection of OpenNLP models in the classpath via classgraph.
+ * By default, this class will search for JAR files starting with 
"opennlp-models-*".
+ * This wildcard pattern can be adjusted by using the alternative constructor 
of this class.
+ */
+public class ClassgraphModelFinder extends AbstractClassPathModelFinder 
implements ClassPathModelFinder {
+
+  /**
+   * By default, it scans for "opennlp-models-*.jar".
+   */
+  public ClassgraphModelFinder() {
+    this(OPENNLP_MODEL_JAR_PREFIX);
+  }
+
+  /**
+   * @param modelJarPrefix The leafnames of the jars that should be canned 
(e.g. "opennlp.jar").
+   *                       May contain a wildcard glob ("opennlp-*.jar"). It 
must not be {@code null}.
+   */
+  public ClassgraphModelFinder(String modelJarPrefix) {
+    super(modelJarPrefix);
+  }
+
+  @Override
+  protected Object getContext() {
+    return new 
ClassGraph().acceptJars(getJarModelPrefix()).disableDirScanning().scan();
+  }
+
+  @Override
+  protected List<URI> getMatchingURIs(String wildcardPattern, Object context) {
+    if (context instanceof ScanResult sr) {
+      try (sr; final ResourceList resources = 
sr.getResourcesMatchingWildcard(wildcardPattern)) {
+        return resources.getURIs();
+      }
+    }
+    return Collections.emptyList();
+  }
+
+}
diff --git 
a/opennlp-tools-models/src/main/java/opennlp/tools/models/SimpleClassPathModelFinder.java
 
b/opennlp-tools-models/src/main/java/opennlp/tools/models/SimpleClassPathModelFinder.java
new file mode 100644
index 00000000..61aa2dd5
--- /dev/null
+++ 
b/opennlp-tools-models/src/main/java/opennlp/tools/models/SimpleClassPathModelFinder.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.tools.models;
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.net.URI;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Enables the detection of OpenNLP models in the classpath via JDK classes
+ * By default, this class will search for JAR files starting with 
"opennlp-models-*".
+ * This wildcard pattern can be adjusted by using the alternative constructor 
of this class.
+ */
+public class SimpleClassPathModelFinder extends AbstractClassPathModelFinder 
implements ClassPathModelFinder {
+
+  private static final Logger logger = 
LoggerFactory.getLogger(SimpleClassPathModelFinder.class);
+
+  /**
+   * By default, it scans for "opennlp-models-*.jar".
+   */
+  public SimpleClassPathModelFinder() {
+    this(OPENNLP_MODEL_JAR_PREFIX);
+  }
+
+  /**
+   * @param modelJarPrefix The leafnames of the jars that should be canned 
(e.g. "opennlp.jar").
+   *                       May contain a wildcard glob ("opennlp-*.jar"). It 
must not be {@code null}.
+   */
+  public SimpleClassPathModelFinder(String modelJarPrefix) {
+    super(modelJarPrefix);
+  }
+  @Override
+  protected Object getContext() {
+    return null;
+  }
+
+  @Override
+  protected List<URI> getMatchingURIs(String wildcardPattern, Object context) {
+    if(wildcardPattern == null) {
+      return Collections.emptyList();
+    }
+    final ClassLoader cl = Thread.currentThread().getContextClassLoader();
+
+    // 1. Check if we have a URL classloader
+    if(cl instanceof URLClassLoader ucl) {
+      return Arrays.stream(ucl.getURLs())
+          .map(url -> {
+            try {
+              return url.toURI();
+            } catch (Exception ignored) {
+              // if we cannot convert an url to an uri, we silently ignore it
+              // since we cannot read it from the classpath anyway.
+              return null;
+            }
+          })
+          .filter(Objects::nonNull)
+          .toList();
+    }
+
+    //2. Try to get URL Classpath via Reflection
+    final URL[] fromUcp = getURLs(cl);
+    if(fromUcp != null && fromUcp.length > 0) {
+      return Arrays.stream(fromUcp)
+          .map(url -> {
+            try {
+              return url.toURI();
+            } catch (Exception ignored) {
+              // if we cannot convert an url to an uri, we silently ignore it
+              // since we cannot read it from the classpath anyway.
+              return null;
+            }
+          })
+          .filter(Objects::nonNull)
+          .toList();
+    }
+
+    //TODO look via system property
+    return null;
+  }
+
+  /*
+   * Java 9 + Bridge to obtain URLs from classpath...
+   * This requires "--add-opens java.base/jdk.internal.loader=ALL-UNNAMED"
+   */
+  private URL[] getURLs(ClassLoader classLoader) {
+    URL[] urls = new URL[0];
+
+    try {
+      final Class builtinClazzLoader = 
Class.forName("jdk.internal.loader.BuiltinClassLoader");
+
+      if (builtinClazzLoader != null) {
+        final Field ucpField = builtinClazzLoader.getDeclaredField("ucp");
+        ucpField.setAccessible(true);
+
+        final Object ucpObject = ucpField.get(classLoader);
+        final Class clazz = Class.forName("jdk.internal.loader.URLClassPath");
+
+        if (clazz != null && ucpObject != null) {
+          final Method getURLs = clazz.getMethod("getURLs");
+
+          if (getURLs != null) {
+            urls = (URL[]) getURLs.invoke(ucpObject);
+          }
+        }
+      }
+
+    } catch(Exception e) {
+      logger.error("Could not obtain classpath URLs in Java 9+ - Exception 
was:");
+      logger.error(e.getLocalizedMessage(), e);
+    }
+    return urls;
+  }
+}
diff --git 
a/opennlp-tools-models/src/test/java/opennlp/tools/models/AbstractClassPathModelTest.java
 
b/opennlp-tools-models/src/test/java/opennlp/tools/models/AbstractClassPathModelTest.java
index 3d9fc1ab..9cfa1638 100644
--- 
a/opennlp-tools-models/src/test/java/opennlp/tools/models/AbstractClassPathModelTest.java
+++ 
b/opennlp-tools-models/src/test/java/opennlp/tools/models/AbstractClassPathModelTest.java
@@ -30,7 +30,7 @@ public abstract class AbstractClassPathModelTest {
 
   protected ClassPathModel getClassPathModel(String modelJarPrefix, boolean 
expectNotFound)
       throws IOException {
-    final ClassPathModelFinder finder = new 
ClassPathModelFinder(modelJarPrefix);
+    final ClassgraphModelFinder finder = new 
ClassgraphModelFinder(modelJarPrefix);
 
     final Set<ClassPathModelEntry> models = finder.findModels(false);
     assertNotNull(models);
diff --git 
a/opennlp-tools-models/src/test/java/opennlp/tools/models/ClassPathModelFinderTest.java
 
b/opennlp-tools-models/src/test/java/opennlp/tools/models/ClassPathModelFinderTest.java
index a10ef778..23b44b27 100644
--- 
a/opennlp-tools-models/src/test/java/opennlp/tools/models/ClassPathModelFinderTest.java
+++ 
b/opennlp-tools-models/src/test/java/opennlp/tools/models/ClassPathModelFinderTest.java
@@ -29,11 +29,11 @@ import static org.junit.jupiter.api.Assertions.assertNull;
 
 public class ClassPathModelFinderTest extends AbstractClassPathModelTest {
 
-  private ClassPathModelFinder finder;
+  private ClassgraphModelFinder finder;
 
   @BeforeEach
   public void prepare() {
-    finder = new ClassPathModelFinder();
+    finder = new ClassgraphModelFinder();
   }
 
   @Test
diff --git 
a/opennlp-tools-models/src/test/java/opennlp/tools/models/ClassPathModelFinderTest.java
 
b/opennlp-tools-models/src/test/java/opennlp/tools/models/SimpleClassPathModelFinderTest.java
similarity index 82%
copy from 
opennlp-tools-models/src/test/java/opennlp/tools/models/ClassPathModelFinderTest.java
copy to 
opennlp-tools-models/src/test/java/opennlp/tools/models/SimpleClassPathModelFinderTest.java
index a10ef778..80599466 100644
--- 
a/opennlp-tools-models/src/test/java/opennlp/tools/models/ClassPathModelFinderTest.java
+++ 
b/opennlp-tools-models/src/test/java/opennlp/tools/models/SimpleClassPathModelFinderTest.java
@@ -16,7 +16,6 @@
  */
 package opennlp.tools.models;
 
-import java.io.IOException;
 import java.util.Set;
 
 import org.junit.jupiter.api.BeforeEach;
@@ -25,15 +24,14 @@ import org.junit.jupiter.api.Test;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
-import static org.junit.jupiter.api.Assertions.assertNull;
 
-public class ClassPathModelFinderTest extends AbstractClassPathModelTest {
+public class SimpleClassPathModelFinderTest extends AbstractClassPathModelTest 
{
 
-  private ClassPathModelFinder finder;
+  private SimpleClassPathModelFinder finder;
 
   @BeforeEach
   public void prepare() {
-    finder = new ClassPathModelFinder();
+    finder = new SimpleClassPathModelFinder();
   }
 
   @Test
@@ -60,10 +58,4 @@ public class ClassPathModelFinderTest extends 
AbstractClassPathModelTest {
     assertEquals(reloadedModels, cacheReloadedModels);
   }
 
-  @Test
-  public void testFindOpenNLPModelsCustomPrefix() throws IOException {
-    final ClassPathModel model =
-        getClassPathModel("wont-find-anything*", true);
-    assertNull(model);
-  }
 }

Reply via email to