This is an automated email from the ASF dual-hosted git repository. rzo1 pushed a commit to branch opennlp-models-optional-classgraph-wip in repository https://gitbox.apache.org/repos/asf/opennlp.git
commit 3e0abede175c5e6ad9de656a1b7d3833becc97ac Author: Richard Zowalla <[email protected]> AuthorDate: Fri Jun 14 12:15:47 2024 +0200 wip --- ...nder.java => AbstractClassPathModelFinder.java} | 83 +++++++------ .../opennlp/tools/models/ClassPathModelFinder.java | 95 +------------- .../tools/models/ClassgraphModelFinder.java | 64 ++++++++++ .../tools/models/SimpleClassPathModelFinder.java | 136 +++++++++++++++++++++ .../tools/models/AbstractClassPathModelTest.java | 2 +- .../tools/models/ClassPathModelFinderTest.java | 4 +- ...st.java => SimpleClassPathModelFinderTest.java} | 14 +-- 7 files changed, 253 insertions(+), 145 deletions(-) diff --git a/opennlp-tools-models/src/main/java/opennlp/tools/models/ClassPathModelFinder.java b/opennlp-tools-models/src/main/java/opennlp/tools/models/AbstractClassPathModelFinder.java similarity index 58% copy from opennlp-tools-models/src/main/java/opennlp/tools/models/ClassPathModelFinder.java copy to opennlp-tools-models/src/main/java/opennlp/tools/models/AbstractClassPathModelFinder.java index 18b527f6..b82207a2 100644 --- a/opennlp-tools-models/src/main/java/opennlp/tools/models/ClassPathModelFinder.java +++ b/opennlp-tools-models/src/main/java/opennlp/tools/models/AbstractClassPathModelFinder.java @@ -23,78 +23,76 @@ import java.util.Objects; import java.util.Optional; import java.util.Set; -import io.github.classgraph.ClassGraph; -import io.github.classgraph.ResourceList; -import io.github.classgraph.ScanResult; - - /** * Enables the detection of OpenNLP models in the classpath. By default, this class will search * for JAR files starting with "opennlp-models-*". This wildcard pattern can be adjusted by * using the alternative constructor of this class. */ -public class ClassPathModelFinder { +public abstract class AbstractClassPathModelFinder implements ClassPathModelFinder { - private static final String OPENNLP_MODEL_JAR_PREFIX = "opennlp-models-*.jar"; private static final String JAR = "jar"; + private final String jarModelPrefix; private Set<ClassPathModelEntry> models; /** * By default, it scans for "opennlp-models-*.jar". */ - public ClassPathModelFinder() { + public AbstractClassPathModelFinder() { this(OPENNLP_MODEL_JAR_PREFIX); } /** - * @param modelJarPrefix The leafnames of the jars that should be canned (e.g. "opennlp.jar"). + * @param jarModelPrefix The leafnames of the jars that should be canned (e.g. "opennlp.jar"). * May contain a wildcard glob ("opennlp-*.jar"). It must not be {@code null}. */ - public ClassPathModelFinder(String modelJarPrefix) { - Objects.requireNonNull(modelJarPrefix, "modelJarPrefix must not be null"); - this.jarModelPrefix = modelJarPrefix; + public AbstractClassPathModelFinder(String jarModelPrefix) { + Objects.requireNonNull(jarModelPrefix, "modelJarPrefix must not be null"); + this.jarModelPrefix = jarModelPrefix; } - /** - * Finds OpenNLP models within the classpath. - * - * @param reloadCache {@code true}, if the internal cache should explicitly be reloaded - * @return A Set of {@link ClassPathModelEntry ClassPathModelEntries}. It might be empty. - */ + @Override public Set<ClassPathModelEntry> findModels(boolean reloadCache) { if (this.models == null || reloadCache) { - try (ScanResult sr = new ClassGraph().acceptJars(jarModelPrefix).disableDirScanning().scan()) { + final List<URI> classpathModels = getMatchingURIs("*.bin", getContext()); + final List<URI> classPathProperties = getMatchingURIs("model.properties", getContext()); - final List<URI> classpathModels = getResourcesMatchingWildcard(sr, "*.bin"); - final List<URI> classPathProperties = getResourcesMatchingWildcard(sr, "model.properties"); + this.models = new HashSet<>(); - this.models = new HashSet<>(); - - for (URI model : classpathModels) { - URI m = null; - for (URI prop : classPathProperties) { - if (jarPathsMatch(model, prop)) { - m = prop; - break; - } + for (URI model : classpathModels) { + URI m = null; + for (URI prop : classPathProperties) { + if (jarPathsMatch(model, prop)) { + m = prop; + break; } - this.models.add(new ClassPathModelEntry(model, Optional.ofNullable(m))); - } + this.models.add(new ClassPathModelEntry(model, Optional.ofNullable(m))); + } } return this.models; } - private List<URI> getResourcesMatchingWildcard(final ScanResult sr, final String resourceWildcard) { - try (final ResourceList resources = sr.getResourcesMatchingWildcard(resourceWildcard)) { - return resources.getURIs(); - } - } + /** + * Subclasses can implement this method to provide additional context to + * {@link AbstractClassPathModelFinder#getMatchingURIs(String, Object)}. + * + * @return a context information. May be {@code null}. + */ + protected abstract Object getContext(); + + /** + * Return matching classpath URIs for the given pattern. + * + * @param wildcardPattern the pattern. Must not be {@code null}. + * @param context an object holding context information. It might be {@code null}. + * @return a list of matching classpath URIs. + */ + protected abstract List<URI> getMatchingURIs(String wildcardPattern, Object context); - private boolean jarPathsMatch(URI uri1, URI uri2) { + protected boolean jarPathsMatch(URI uri1, URI uri2) { final String[] parts1 = parseJarURI(uri1); final String[] parts2 = parseJarURI(uri2); @@ -105,7 +103,7 @@ public class ClassPathModelFinder { return parts1[0].equals(parts2[0]); } - private String[] parseJarURI(URI uri) { + protected String[] parseJarURI(URI uri) { if (JAR.equals(uri.getScheme())) { final String ssp = uri.getSchemeSpecificPart(); final int separatorIndex = ssp.indexOf("!/"); @@ -118,4 +116,11 @@ public class ClassPathModelFinder { return null; } + protected String getJarModelPrefix() { + return jarModelPrefix; + } + + protected Set<ClassPathModelEntry> getModels() { + return models; + } } diff --git a/opennlp-tools-models/src/main/java/opennlp/tools/models/ClassPathModelFinder.java b/opennlp-tools-models/src/main/java/opennlp/tools/models/ClassPathModelFinder.java index 18b527f6..5603d325 100644 --- a/opennlp-tools-models/src/main/java/opennlp/tools/models/ClassPathModelFinder.java +++ b/opennlp-tools-models/src/main/java/opennlp/tools/models/ClassPathModelFinder.java @@ -16,45 +16,11 @@ */ package opennlp.tools.models; -import java.net.URI; -import java.util.HashSet; -import java.util.List; -import java.util.Objects; -import java.util.Optional; import java.util.Set; -import io.github.classgraph.ClassGraph; -import io.github.classgraph.ResourceList; -import io.github.classgraph.ScanResult; +public interface ClassPathModelFinder { - -/** - * Enables the detection of OpenNLP models in the classpath. By default, this class will search - * for JAR files starting with "opennlp-models-*". This wildcard pattern can be adjusted by - * using the alternative constructor of this class. - */ -public class ClassPathModelFinder { - - private static final String OPENNLP_MODEL_JAR_PREFIX = "opennlp-models-*.jar"; - private static final String JAR = "jar"; - private final String jarModelPrefix; - private Set<ClassPathModelEntry> models; - - /** - * By default, it scans for "opennlp-models-*.jar". - */ - public ClassPathModelFinder() { - this(OPENNLP_MODEL_JAR_PREFIX); - } - - /** - * @param modelJarPrefix The leafnames of the jars that should be canned (e.g. "opennlp.jar"). - * May contain a wildcard glob ("opennlp-*.jar"). It must not be {@code null}. - */ - public ClassPathModelFinder(String modelJarPrefix) { - Objects.requireNonNull(modelJarPrefix, "modelJarPrefix must not be null"); - this.jarModelPrefix = modelJarPrefix; - } + String OPENNLP_MODEL_JAR_PREFIX = "opennlp-models-*.jar"; /** * Finds OpenNLP models within the classpath. @@ -62,60 +28,5 @@ public class ClassPathModelFinder { * @param reloadCache {@code true}, if the internal cache should explicitly be reloaded * @return A Set of {@link ClassPathModelEntry ClassPathModelEntries}. It might be empty. */ - public Set<ClassPathModelEntry> findModels(boolean reloadCache) { - - if (this.models == null || reloadCache) { - try (ScanResult sr = new ClassGraph().acceptJars(jarModelPrefix).disableDirScanning().scan()) { - - final List<URI> classpathModels = getResourcesMatchingWildcard(sr, "*.bin"); - final List<URI> classPathProperties = getResourcesMatchingWildcard(sr, "model.properties"); - - this.models = new HashSet<>(); - - for (URI model : classpathModels) { - URI m = null; - for (URI prop : classPathProperties) { - if (jarPathsMatch(model, prop)) { - m = prop; - break; - } - } - this.models.add(new ClassPathModelEntry(model, Optional.ofNullable(m))); - - } - } - } - return this.models; - } - - private List<URI> getResourcesMatchingWildcard(final ScanResult sr, final String resourceWildcard) { - try (final ResourceList resources = sr.getResourcesMatchingWildcard(resourceWildcard)) { - return resources.getURIs(); - } - } - - private boolean jarPathsMatch(URI uri1, URI uri2) { - final String[] parts1 = parseJarURI(uri1); - final String[] parts2 = parseJarURI(uri2); - - if (parts1 == null || parts2 == null) { - return false; - } - - return parts1[0].equals(parts2[0]); - } - - private String[] parseJarURI(URI uri) { - if (JAR.equals(uri.getScheme())) { - final String ssp = uri.getSchemeSpecificPart(); - final int separatorIndex = ssp.indexOf("!/"); - if (separatorIndex > 0) { - final String jarFileUri = ssp.substring(0, separatorIndex); - final String entryPath = ssp.substring(separatorIndex + 2); - return new String[] {jarFileUri, entryPath}; - } - } - return null; - } - + Set<ClassPathModelEntry> findModels(boolean reloadCache); } diff --git a/opennlp-tools-models/src/main/java/opennlp/tools/models/ClassgraphModelFinder.java b/opennlp-tools-models/src/main/java/opennlp/tools/models/ClassgraphModelFinder.java new file mode 100644 index 00000000..cd3343a6 --- /dev/null +++ b/opennlp-tools-models/src/main/java/opennlp/tools/models/ClassgraphModelFinder.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package opennlp.tools.models; + +import java.net.URI; +import java.util.Collections; +import java.util.List; + +import io.github.classgraph.ClassGraph; +import io.github.classgraph.ResourceList; +import io.github.classgraph.ScanResult; + +/** + * Enables the detection of OpenNLP models in the classpath via classgraph. + * By default, this class will search for JAR files starting with "opennlp-models-*". + * This wildcard pattern can be adjusted by using the alternative constructor of this class. + */ +public class ClassgraphModelFinder extends AbstractClassPathModelFinder implements ClassPathModelFinder { + + /** + * By default, it scans for "opennlp-models-*.jar". + */ + public ClassgraphModelFinder() { + this(OPENNLP_MODEL_JAR_PREFIX); + } + + /** + * @param modelJarPrefix The leafnames of the jars that should be canned (e.g. "opennlp.jar"). + * May contain a wildcard glob ("opennlp-*.jar"). It must not be {@code null}. + */ + public ClassgraphModelFinder(String modelJarPrefix) { + super(modelJarPrefix); + } + + @Override + protected Object getContext() { + return new ClassGraph().acceptJars(getJarModelPrefix()).disableDirScanning().scan(); + } + + @Override + protected List<URI> getMatchingURIs(String wildcardPattern, Object context) { + if (context instanceof ScanResult sr) { + try (sr; final ResourceList resources = sr.getResourcesMatchingWildcard(wildcardPattern)) { + return resources.getURIs(); + } + } + return Collections.emptyList(); + } + +} diff --git a/opennlp-tools-models/src/main/java/opennlp/tools/models/SimpleClassPathModelFinder.java b/opennlp-tools-models/src/main/java/opennlp/tools/models/SimpleClassPathModelFinder.java new file mode 100644 index 00000000..61aa2dd5 --- /dev/null +++ b/opennlp-tools-models/src/main/java/opennlp/tools/models/SimpleClassPathModelFinder.java @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package opennlp.tools.models; + +import java.lang.reflect.Field; +import java.lang.reflect.Method; +import java.net.URI; +import java.net.URL; +import java.net.URLClassLoader; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Objects; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Enables the detection of OpenNLP models in the classpath via JDK classes + * By default, this class will search for JAR files starting with "opennlp-models-*". + * This wildcard pattern can be adjusted by using the alternative constructor of this class. + */ +public class SimpleClassPathModelFinder extends AbstractClassPathModelFinder implements ClassPathModelFinder { + + private static final Logger logger = LoggerFactory.getLogger(SimpleClassPathModelFinder.class); + + /** + * By default, it scans for "opennlp-models-*.jar". + */ + public SimpleClassPathModelFinder() { + this(OPENNLP_MODEL_JAR_PREFIX); + } + + /** + * @param modelJarPrefix The leafnames of the jars that should be canned (e.g. "opennlp.jar"). + * May contain a wildcard glob ("opennlp-*.jar"). It must not be {@code null}. + */ + public SimpleClassPathModelFinder(String modelJarPrefix) { + super(modelJarPrefix); + } + @Override + protected Object getContext() { + return null; + } + + @Override + protected List<URI> getMatchingURIs(String wildcardPattern, Object context) { + if(wildcardPattern == null) { + return Collections.emptyList(); + } + final ClassLoader cl = Thread.currentThread().getContextClassLoader(); + + // 1. Check if we have a URL classloader + if(cl instanceof URLClassLoader ucl) { + return Arrays.stream(ucl.getURLs()) + .map(url -> { + try { + return url.toURI(); + } catch (Exception ignored) { + // if we cannot convert an url to an uri, we silently ignore it + // since we cannot read it from the classpath anyway. + return null; + } + }) + .filter(Objects::nonNull) + .toList(); + } + + //2. Try to get URL Classpath via Reflection + final URL[] fromUcp = getURLs(cl); + if(fromUcp != null && fromUcp.length > 0) { + return Arrays.stream(fromUcp) + .map(url -> { + try { + return url.toURI(); + } catch (Exception ignored) { + // if we cannot convert an url to an uri, we silently ignore it + // since we cannot read it from the classpath anyway. + return null; + } + }) + .filter(Objects::nonNull) + .toList(); + } + + //TODO look via system property + return null; + } + + /* + * Java 9 + Bridge to obtain URLs from classpath... + * This requires "--add-opens java.base/jdk.internal.loader=ALL-UNNAMED" + */ + private URL[] getURLs(ClassLoader classLoader) { + URL[] urls = new URL[0]; + + try { + final Class builtinClazzLoader = Class.forName("jdk.internal.loader.BuiltinClassLoader"); + + if (builtinClazzLoader != null) { + final Field ucpField = builtinClazzLoader.getDeclaredField("ucp"); + ucpField.setAccessible(true); + + final Object ucpObject = ucpField.get(classLoader); + final Class clazz = Class.forName("jdk.internal.loader.URLClassPath"); + + if (clazz != null && ucpObject != null) { + final Method getURLs = clazz.getMethod("getURLs"); + + if (getURLs != null) { + urls = (URL[]) getURLs.invoke(ucpObject); + } + } + } + + } catch(Exception e) { + logger.error("Could not obtain classpath URLs in Java 9+ - Exception was:"); + logger.error(e.getLocalizedMessage(), e); + } + return urls; + } +} diff --git a/opennlp-tools-models/src/test/java/opennlp/tools/models/AbstractClassPathModelTest.java b/opennlp-tools-models/src/test/java/opennlp/tools/models/AbstractClassPathModelTest.java index 3d9fc1ab..9cfa1638 100644 --- a/opennlp-tools-models/src/test/java/opennlp/tools/models/AbstractClassPathModelTest.java +++ b/opennlp-tools-models/src/test/java/opennlp/tools/models/AbstractClassPathModelTest.java @@ -30,7 +30,7 @@ public abstract class AbstractClassPathModelTest { protected ClassPathModel getClassPathModel(String modelJarPrefix, boolean expectNotFound) throws IOException { - final ClassPathModelFinder finder = new ClassPathModelFinder(modelJarPrefix); + final ClassgraphModelFinder finder = new ClassgraphModelFinder(modelJarPrefix); final Set<ClassPathModelEntry> models = finder.findModels(false); assertNotNull(models); diff --git a/opennlp-tools-models/src/test/java/opennlp/tools/models/ClassPathModelFinderTest.java b/opennlp-tools-models/src/test/java/opennlp/tools/models/ClassPathModelFinderTest.java index a10ef778..23b44b27 100644 --- a/opennlp-tools-models/src/test/java/opennlp/tools/models/ClassPathModelFinderTest.java +++ b/opennlp-tools-models/src/test/java/opennlp/tools/models/ClassPathModelFinderTest.java @@ -29,11 +29,11 @@ import static org.junit.jupiter.api.Assertions.assertNull; public class ClassPathModelFinderTest extends AbstractClassPathModelTest { - private ClassPathModelFinder finder; + private ClassgraphModelFinder finder; @BeforeEach public void prepare() { - finder = new ClassPathModelFinder(); + finder = new ClassgraphModelFinder(); } @Test diff --git a/opennlp-tools-models/src/test/java/opennlp/tools/models/ClassPathModelFinderTest.java b/opennlp-tools-models/src/test/java/opennlp/tools/models/SimpleClassPathModelFinderTest.java similarity index 82% copy from opennlp-tools-models/src/test/java/opennlp/tools/models/ClassPathModelFinderTest.java copy to opennlp-tools-models/src/test/java/opennlp/tools/models/SimpleClassPathModelFinderTest.java index a10ef778..80599466 100644 --- a/opennlp-tools-models/src/test/java/opennlp/tools/models/ClassPathModelFinderTest.java +++ b/opennlp-tools-models/src/test/java/opennlp/tools/models/SimpleClassPathModelFinderTest.java @@ -16,7 +16,6 @@ */ package opennlp.tools.models; -import java.io.IOException; import java.util.Set; import org.junit.jupiter.api.BeforeEach; @@ -25,15 +24,14 @@ import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -public class ClassPathModelFinderTest extends AbstractClassPathModelTest { +public class SimpleClassPathModelFinderTest extends AbstractClassPathModelTest { - private ClassPathModelFinder finder; + private SimpleClassPathModelFinder finder; @BeforeEach public void prepare() { - finder = new ClassPathModelFinder(); + finder = new SimpleClassPathModelFinder(); } @Test @@ -60,10 +58,4 @@ public class ClassPathModelFinderTest extends AbstractClassPathModelTest { assertEquals(reloadedModels, cacheReloadedModels); } - @Test - public void testFindOpenNLPModelsCustomPrefix() throws IOException { - final ClassPathModel model = - getClassPathModel("wont-find-anything*", true); - assertNull(model); - } }
