This is an automated email from the ASF dual-hosted git repository. snagel pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git
commit 1f5f3e4d42b8dfb8bf741b11c9f39cc8bcd34091 Author: Sebastian Nagel <sna...@apache.org> AuthorDate: Thu May 19 15:26:46 2022 +0200 NUTCH-2936 Early registration of URL stream handlers provided by plugins may fail Hadoop jobs running in distributed mode if protocol-okhttp is used - code improvements Nutch plugin system: - use `Class<?>` and remove suppressions of warnings - javadocs: fix typos - remove superfluous white space - autoformat using code style template --- src/java/org/apache/nutch/plugin/Extension.java | 14 ++++---- src/java/org/apache/nutch/plugin/Plugin.java | 2 +- .../org/apache/nutch/plugin/PluginRepository.java | 22 ++++++------ .../nutch/plugin/URLStreamHandlerFactory.java | 41 ++++++++++++---------- 4 files changed, 40 insertions(+), 39 deletions(-) diff --git a/src/java/org/apache/nutch/plugin/Extension.java b/src/java/org/apache/nutch/plugin/Extension.java index 246e8ff7b..e949ea317 100644 --- a/src/java/org/apache/nutch/plugin/Extension.java +++ b/src/java/org/apache/nutch/plugin/Extension.java @@ -143,15 +143,15 @@ public class Extension { * Return an instance of the extension implementation. Before we create a * extension instance we startup the plugin if it is not already done. The * plugin instance and the extension instance use the same - * {@link org.apache.nutch.plugin.PluginClassLoader}. - * Each Plugin use its own classloader. The - * {@link org.apache.nutch.plugin.PluginClassLoader} knows only its own - * <i>plugin runtime libraries</i> defined - * in the <code>plugin.xml</code> manifest file and exported libraries - * of the dependent plugins. + * {@link org.apache.nutch.plugin.PluginClassLoader}. Each Plugin uses its own + * classloader. The {@link org.apache.nutch.plugin.PluginClassLoader} knows + * only its own <i>plugin runtime libraries</i> defined in the + * <code>plugin.xml</code> manifest file and exported libraries of the + * dependent plugins. * * @return Object An instance of the extension implementation - * @throws PluginRuntimeException if there is a fatal runtime error + * @throws PluginRuntimeException + * if there is a fatal runtime error */ public Object getExtensionInstance() throws PluginRuntimeException { // Must synchronize here to make sure creation and initialization diff --git a/src/java/org/apache/nutch/plugin/Plugin.java b/src/java/org/apache/nutch/plugin/Plugin.java index 314a8669d..306ada3d4 100644 --- a/src/java/org/apache/nutch/plugin/Plugin.java +++ b/src/java/org/apache/nutch/plugin/Plugin.java @@ -27,7 +27,7 @@ import org.apache.hadoop.conf.Configuration; * provide a API and invoke one or a set of installed extensions. * * Each plugin may extend the base <code>Plugin</code>. <code>Plugin</code> - * instances are used as the point of life cycle managemet of plugin related + * instances are used as the point of life cycle management of plugin related * functionality. * * The <code>Plugin</code> will be started up and shutdown by the nutch plugin diff --git a/src/java/org/apache/nutch/plugin/PluginRepository.java b/src/java/org/apache/nutch/plugin/PluginRepository.java index 3c554094b..1eec0ffc8 100644 --- a/src/java/org/apache/nutch/plugin/PluginRepository.java +++ b/src/java/org/apache/nutch/plugin/PluginRepository.java @@ -38,11 +38,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** - * <p>The plugin repositority is a registry of all plugins.</p> + * <p>The plugin repository is a registry of all plugins.</p> * - * <p>At system boot up a repositority is built by parsing the mainifest files of + * <p>At system boot up a repository is built by parsing the manifest files of * all plugins. Plugins that require other plugins which do not exist are not - * registed. For each plugin a plugin descriptor instance will be created. The + * registered. For each plugin a plugin descriptor instance will be created. The * descriptor represents all meta information about a plugin. So a plugin * instance will be created later when it is required, this allow lazy plugin * loading.</p> @@ -64,8 +64,7 @@ public class PluginRepository implements URLStreamHandlerFactory { private HashMap<String, Plugin> fActivatedPlugins; - @SuppressWarnings("rawtypes") - private static final Map<String, Map<PluginClassLoader, Class>> CLASS_CACHE = new HashMap<>(); + private static final Map<String, Map<PluginClassLoader, Class<?>>> CLASS_CACHE = new HashMap<>(); private Configuration conf; @@ -267,14 +266,14 @@ public class PluginRepository implements URLStreamHandlerFactory { } /** - * <p>Returns a instance of a plugin. Plugin instances are cached. So a plugin - * exist only as one instance. This allow a central management of plugin own + * <p>Returns an instance of a plugin. Plugin instances are cached. So a plugin + * exist only as one instance. This allow a central management of plugin's own * resources.</p> * * <p>After creating the plugin instance the startUp() method is invoked. The * plugin use a own classloader that is used as well by all instance of * extensions of the same plugin. This class loader use all exported libraries - * from the dependend plugins and all plugin libraries.</p> + * from the dependent plugins and all plugin libraries.</p> * * @param pDescriptor a {@link PluginDescriptor} for which to retrieve a * {@link Plugin} instance @@ -337,16 +336,15 @@ public class PluginRepository implements URLStreamHandlerFactory { } } - @SuppressWarnings("rawtypes") - public static Class getCachedClass(PluginDescriptor pDescriptor, String className) + public Class<?> getCachedClass(PluginDescriptor pDescriptor, String className) throws ClassNotFoundException { - Map<PluginClassLoader, Class> descMap = CLASS_CACHE.get(className); + Map<PluginClassLoader, Class<?>> descMap = CLASS_CACHE.get(className); if (descMap == null) { descMap = new HashMap<>(); CLASS_CACHE.put(className, descMap); } PluginClassLoader loader = pDescriptor.getClassLoader(); - Class clazz = descMap.get(loader); + Class<?> clazz = descMap.get(loader); if (clazz == null) { clazz = loader.loadClass(className); descMap.put(loader, clazz); diff --git a/src/java/org/apache/nutch/plugin/URLStreamHandlerFactory.java b/src/java/org/apache/nutch/plugin/URLStreamHandlerFactory.java index 5aed76a35..6c79fe9e6 100644 --- a/src/java/org/apache/nutch/plugin/URLStreamHandlerFactory.java +++ b/src/java/org/apache/nutch/plugin/URLStreamHandlerFactory.java @@ -35,13 +35,13 @@ import org.slf4j.LoggerFactory; */ public class URLStreamHandlerFactory implements java.net.URLStreamHandlerFactory { - + protected static final Logger LOG = LoggerFactory .getLogger(URLStreamHandlerFactory.class); - + /** The singleton instance. */ private static URLStreamHandlerFactory instance; - + /** Here we register all PluginRepositories. * In this class we do not know why several instances of PluginRepository * are kept, nor do we know how long they will be used. To prevent @@ -51,64 +51,67 @@ public class URLStreamHandlerFactory * outdated references which is done in the {@link #removeInvalidRefs()} method. */ private ArrayList<WeakReference<PluginRepository>> prs; - + static { instance = new URLStreamHandlerFactory(); URL.setURLStreamHandlerFactory(instance); LOG.debug("Registered URLStreamHandlerFactory with the JVM."); } - + private URLStreamHandlerFactory() { this.prs = new ArrayList<>(); } /** * Get the singleton instance of this class. - * @return a {@link org.apache.nutch.plugin.URLStreamHandlerFactory} instance + * @return a {@link org.apache.nutch.plugin.URLStreamHandlerFactory} instance */ public static URLStreamHandlerFactory getInstance() { return instance; } - + /** Use this method once a new PluginRepository was created to register it. * * @param pr The PluginRepository to be registered. */ public void registerPluginRepository(PluginRepository pr) { this.prs.add(new WeakReference<PluginRepository>(pr)); - + removeInvalidRefs(); } @Override public URLStreamHandler createURLStreamHandler(String protocol) { LOG.debug("Creating URLStreamHandler for protocol: {}", protocol); - + removeInvalidRefs(); - + // find the 'correct' PluginRepository. For now we simply take the first. // then ask it to return the URLStreamHandler - for(WeakReference<PluginRepository> ref: this.prs) { + for (WeakReference<PluginRepository> ref : this.prs) { PluginRepository pr = ref.get(); - if(pr != null) { + if (pr != null) { // found PluginRepository. Let's get the URLStreamHandler... - return pr.createURLStreamHandler(protocol); + URLStreamHandler handler = pr.createURLStreamHandler(protocol); + return handler; } } + return null; } - /** Maintains the list of PluginRepositories by - * removing the references whose referents have been - * garbage collected meanwhile. + /** + * Maintains the list of PluginRepositories by removing the references whose + * referents have been garbage collected meanwhile. */ private void removeInvalidRefs() { ArrayList<WeakReference<PluginRepository>> copy = new ArrayList<>(this.prs); - for(WeakReference<PluginRepository> ref: copy) { - if(ref.get() == null) { + for (WeakReference<PluginRepository> ref : copy) { + if (ref.get() == null) { this.prs.remove(ref); } } - LOG.debug("Removed '{}' invalid references. '{}' remaining.", copy.size()-this.prs.size(), this.prs.size()); + LOG.debug("Removed '{}' invalid references. '{}' remaining.", + copy.size() - this.prs.size(), this.prs.size()); } }