[
https://issues.apache.org/jira/browse/DRILL-3742?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14955921#comment-14955921
]
ASF GitHub Bot commented on DRILL-3742:
---------------------------------------
Github user julienledem commented on a diff in the pull request:
https://github.com/apache/drill/pull/148#discussion_r41939660
--- Diff:
common/src/main/java/org/apache/drill/common/scanner/ClassPathScanner.java ---
@@ -0,0 +1,458 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.common.scanner;
+
+import static java.lang.String.format;
+import static java.util.Arrays.asList;
+import static java.util.Collections.unmodifiableList;
+import static java.util.Collections.unmodifiableSet;
+import static java.util.concurrent.TimeUnit.MILLISECONDS;
+
+import java.io.IOException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.drill.common.config.CommonConstants;
+import org.apache.drill.common.config.DrillConfig;
+import org.apache.drill.common.scanner.persistence.AnnotationDescriptor;
+import org.apache.drill.common.scanner.persistence.AttributeDescriptor;
+import org.apache.drill.common.scanner.persistence.ChildClassDescriptor;
+import org.apache.drill.common.scanner.persistence.FieldDescriptor;
+import org.apache.drill.common.scanner.persistence.FunctionDescriptor;
+import org.apache.drill.common.scanner.persistence.ParentClassDescriptor;
+import org.apache.drill.common.scanner.persistence.ScanResult;
+import org.reflections.Reflections;
+import org.reflections.adapters.JavassistAdapter;
+import org.reflections.scanners.AbstractScanner;
+import org.reflections.util.ConfigurationBuilder;
+import org.reflections.util.FilterBuilder;
+
+import com.google.common.base.Stopwatch;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.Multimap;
+import com.google.common.collect.Sets;
+
+import javassist.bytecode.AccessFlag;
+import javassist.bytecode.AnnotationsAttribute;
+import javassist.bytecode.ClassFile;
+import javassist.bytecode.FieldInfo;
+import javassist.bytecode.annotation.AnnotationMemberValue;
+import javassist.bytecode.annotation.ArrayMemberValue;
+import javassist.bytecode.annotation.BooleanMemberValue;
+import javassist.bytecode.annotation.ByteMemberValue;
+import javassist.bytecode.annotation.CharMemberValue;
+import javassist.bytecode.annotation.ClassMemberValue;
+import javassist.bytecode.annotation.DoubleMemberValue;
+import javassist.bytecode.annotation.EnumMemberValue;
+import javassist.bytecode.annotation.FloatMemberValue;
+import javassist.bytecode.annotation.IntegerMemberValue;
+import javassist.bytecode.annotation.LongMemberValue;
+import javassist.bytecode.annotation.MemberValue;
+import javassist.bytecode.annotation.MemberValueVisitor;
+import javassist.bytecode.annotation.ShortMemberValue;
+import javassist.bytecode.annotation.StringMemberValue;
+
+/**
+ * Classpath scanning utility.
+ * The classpath should be scanned once at startup from a DrillConfig
instance. {@see ClassPathScanner#fromPrescan(DrillConfig)}
+ * The DrillConfig provides the list of packages to scan.
(drill.classpath.scanning.packages) {@see
CommonConstants#IMPLEMENTATIONS_SCAN_PACKAGES}
+ * Only the class directories and jars containing a drill-module.conf will
be scanned.
+ * Drill core packages are scanned at build time and the result is saved
in a JSON file. {@see ClassPathScanner#FUNCTION_REGISTRY_FILE}
+ * At runtime only the packages that have not be scanned yet will be
scanned.
+ */
+public final class ClassPathScanner {
+ private static final org.slf4j.Logger logger =
org.slf4j.LoggerFactory.getLogger(ClassPathScanner.class);
+ private static final JavassistAdapter METADATA_ADAPTER = new
JavassistAdapter();
+
+ /**
+ * defines all the things to scan for
+ */
+ private static final Set<String> SCANNED_CLASSES = unmodifiableSet(
+ new HashSet<>(asList(
+ "org.apache.drill.common.logical.data.LogicalOperator",
+ "org.apache.drill.common.logical.FormatPluginConfig",
+ "org.apache.drill.common.logical.StoragePluginConfig",
+ "org.apache.drill.exec.expr.DrillFunc",
+ "org.apache.drill.exec.expr.fn.PluggableFunctionRegistry",
+ "org.apache.drill.exec.physical.base.PhysicalOperator",
+ "org.apache.drill.exec.physical.impl.BatchCreator",
+ "org.apache.drill.exec.physical.impl.RootCreator",
+ "org.apache.drill.exec.rpc.user.security.UserAuthenticator",
+ "org.apache.drill.exec.store.dfs.FormatPlugin",
+ "org.apache.drill.exec.store.StoragePlugin",
+ "org.apache.hadoop.hive.ql.udf.generic.GenericUDF",
+ "org.apache.hadoop.hive.ql.exec.UDF"
+ )));
+
+
+ /**
+ * scans the inheritance tree
+ */
+ private static class SubTypesScanner extends AbstractScanner {
--- End diff --
because the original one is extremely simple and makes use of their weird
MultiMap based persistence which we don't need. Also we want to keep track if
something is abstract or not (which was handled through loading the class and
reflection before that patch)
> Improve classpath scanning to reduce the time it takes
> ------------------------------------------------------
>
> Key: DRILL-3742
> URL: https://issues.apache.org/jira/browse/DRILL-3742
> Project: Apache Drill
> Issue Type: Improvement
> Reporter: Julien Le Dem
> Fix For: Future
>
>
> classpath scanning and function registry take a long time (seconds every
> time).
> We'd want to avoid loading the classes (use bytecode inspection instead) and
> have a build time cache to avoid doing the scanning at startup.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)