gianm commented on code in PR #12816:
URL: https://github.com/apache/druid/pull/12816#discussion_r935947078


##########
processing/src/main/java/org/apache/druid/guice/ExtensionsLoader.java:
##########
@@ -0,0 +1,328 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.guice;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.inject.Injector;
+import org.apache.commons.io.FileUtils;
+import org.apache.druid.initialization.DruidModule;
+import org.apache.druid.java.util.common.ISE;
+import org.apache.druid.java.util.common.logger.Logger;
+
+import javax.inject.Inject;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.ServiceLoader;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.stream.Collectors;
+
+public class ExtensionsLoader
+{
+  private static final Logger log = new Logger(ExtensionsLoader.class);
+
+  private final ExtensionsConfig extensionsConfig;
+  private final ConcurrentHashMap<File, URLClassLoader> loaders = new 
ConcurrentHashMap<>();
+
+  /**
+   * Map of loaded extensions, keyed by class (or interface).
+   */
+  private final ConcurrentHashMap<Class<?>, Collection<?>> extensions = new 
ConcurrentHashMap<>();
+
+  @Inject
+  public ExtensionsLoader(ExtensionsConfig config)
+  {
+    this.extensionsConfig = config;
+  }
+
+  public static ExtensionsLoader instance(Injector injector)
+  {
+    return injector.getInstance(ExtensionsLoader.class);
+  }
+
+  public ExtensionsConfig config()
+  {
+    return extensionsConfig;
+  }
+
+  /**
+   * @param clazz service class
+   * @param <T>   the service type
+   *
+   * @return Returns a collection of implementations loaded.
+   */
+  public <T> Collection<T> getLoadedImplementations(Class<T> clazz)
+  {
+    @SuppressWarnings("unchecked")
+    Collection<T> retVal = (Collection<T>) extensions.get(clazz);
+    if (retVal == null) {
+      return new HashSet<>();
+    }
+    return retVal;
+  }
+
+  /**
+   * @return Returns a collection of implementations loaded.
+   */
+  public Collection<DruidModule> getLoadedModules()
+  {
+    return getLoadedImplementations(DruidModule.class);
+  }
+
+  @VisibleForTesting
+  public Map<File, URLClassLoader> getLoadersMap()
+  {
+    return loaders;
+  }
+
+  /**
+   * Look for implementations for the given class from both classpath and 
extensions directory, using {@link
+   * ServiceLoader}. A user should never put the same two extensions in 
classpath and extensions directory, if he/she
+   * does that, the one that is in the classpath will be loaded, the other 
will be ignored.
+   *
+   * @param serviceClass The class to look the implementations of (e.g., 
DruidModule)
+   *
+   * @return A collection that contains implementations (of distinct concrete 
classes) of the given class. The order of
+   * elements in the returned collection is not specified and not guaranteed 
to be the same for different calls to
+   * getFromExtensions().
+   */
+  @SuppressWarnings("unchecked")
+  public <T> Collection<T> getFromExtensions(Class<T> serviceClass)
+  {
+    // Classes classes are loaded once upon first request. Since the class 
path does
+    // not change during a run, the set of extension classes cannot change once
+    // computed.
+    //
+    // In practice, it appears the only place this matters is with DruidModule:
+    // initialization gets the list of extensions, and two REST API calls later
+    // ask for the same list.
+    Collection<?> modules = extensions.computeIfAbsent(
+        serviceClass,
+        serviceC -> new ServiceLoadingFromExtensions<>(serviceC).implsToLoad
+    );
+    //noinspection unchecked
+    return (Collection<T>) modules;
+  }
+
+  public Collection<DruidModule> getModules()
+  {
+    return getFromExtensions(DruidModule.class);
+  }
+
+  /**
+   * Find all the extension files that should be loaded by druid.
+   * <p/>
+   * If user explicitly specifies druid.extensions.loadList, then it will look 
for those extensions under root
+   * extensions directory. If one of them is not found, druid will fail loudly.
+   * <p/>
+   * If user doesn't specify druid.extension.toLoad (or its value is empty), 
druid will load all the extensions
+   * under the root extensions directory.
+   *
+   * @return an array of druid extension files that will be loaded by druid 
process
+   */
+  public File[] getExtensionFilesToLoad()
+  {
+    final File rootExtensionsDir = new File(extensionsConfig.getDirectory());
+    if (rootExtensionsDir.exists() && !rootExtensionsDir.isDirectory()) {
+      throw new ISE("Root extensions directory [%s] is not a directory!?", 
rootExtensionsDir);
+    }
+    File[] extensionsToLoad;
+    final LinkedHashSet<String> toLoad = extensionsConfig.getLoadList();
+    if (toLoad == null) {
+      extensionsToLoad = rootExtensionsDir.listFiles();
+    } else {
+      int i = 0;
+      extensionsToLoad = new File[toLoad.size()];
+      for (final String extensionName : toLoad) {
+        File extensionDir = new File(extensionName);
+        if (!extensionDir.isAbsolute()) {
+          extensionDir = new File(rootExtensionsDir, extensionName);
+        }
+
+        if (!extensionDir.isDirectory()) {
+          throw new ISE(
+              "Extension [%s] specified in \"druid.extensions.loadList\" 
didn't exist!?",
+              extensionDir.getAbsolutePath()
+          );
+        }
+        extensionsToLoad[i++] = extensionDir;
+      }
+    }
+    return extensionsToLoad == null ? new File[]{} : extensionsToLoad;
+  }
+
+  /**
+   * @param extension The File instance of the extension we want to load
+   *
+   * @return a URLClassLoader that loads all the jars on which the extension 
is dependent
+   */
+  public URLClassLoader getClassLoaderForExtension(File extension, boolean 
useExtensionClassloaderFirst)
+  {
+    return loaders.computeIfAbsent(

Review Comment:
   Yeah, I agree this is all weird. From memory, I added the feature in #5321 
after struggling with dependency problems with certain extensions and Hadoop 
jobs and not having come up with a better solution. I wish I had been able to 
come up with a better solution at the time, because it's a goofy feature. (I 
don't use it anymore on any clusters I'm currently involved with.)
   
   At any rate, that same patch #5321 introduced the bad caching code. I should 
have changed the key of the loader map from `File` to `Pair<File, Boolean>`. Do 
you mind doing that now? You'll save me a bit of time, since otherwise I'd feel 
compelled to do it after you file this patch. It's more correct behavior for 
the function, and it won't change behavior in the normal case where the 
server-wide value of `useExtensionClassloaderFirst` is `false`. (All other 
calls pass in an explicit `false`.) So I think the risk is low.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to