[
https://issues.apache.org/jira/browse/TAJO-1686?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15205731#comment-15205731
]
ASF GitHub Bot commented on TAJO-1686:
--------------------------------------
Github user eminency commented on a diff in the pull request:
https://github.com/apache/tajo/pull/929#discussion_r56934161
--- Diff:
tajo-core/src/main/java/org/apache/tajo/engine/function/hiveudf/HiveFunctionLoader.java
---
@@ -0,0 +1,161 @@
+/***
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.engine.function.hiveudf;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.io.Writable;
+import org.apache.tajo.catalog.FunctionDesc;
+import org.apache.tajo.catalog.FunctionDescBuilder;
+import org.apache.tajo.catalog.proto.CatalogProtos;
+import org.apache.tajo.common.TajoDataTypes;
+import org.apache.tajo.conf.TajoConf;
+import org.apache.tajo.exception.TajoInternalError;
+import org.apache.tajo.function.UDFInvocationDesc;
+import org.apache.tajo.util.WritableTypeConverter;
+import org.reflections.Reflections;
+import org.reflections.util.ConfigurationBuilder;
+
+import java.io.IOException;
+import java.lang.reflect.Method;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.util.*;
+
+public class HiveFunctionLoader {
+ public static Optional<List<FunctionDesc>> loadHiveUDFs(TajoConf conf) {
+ ArrayList<FunctionDesc> funcList = new ArrayList<>();
+ String udfdir = conf.getVar(TajoConf.ConfVars.HIVE_UDF_DIR);
+
+ try {
+ Path udfPath = new Path(udfdir);
+ FileSystem fs = udfPath.getFileSystem(conf);
+
+ if (!fs.isDirectory(udfPath)) {
+ return Optional.empty();
+ }
+
+ // loop each jar file
+ for (FileStatus fstatus : fs.listStatus(udfPath, (Path path) ->
path.getName().endsWith(".jar"))) {
+
+ URL[] urls = new URL[]{new URL("jar:" +
fstatus.getPath().toUri().toURL() + "!/")};
+
+ // extract and register UDF's decendants (legacy Hive UDF form)
+ Set<Class<? extends UDF>> udfClasses =
getSubclassesFromJarEntry(urls, UDF.class);
+ if (udfClasses != null) {
+ buildFunctionsFromUDF(udfClasses, funcList,
"jar:"+urls[0].getPath());
+ }
+ }
+ } catch (IOException e) {
+ throw new TajoInternalError(e);
+ }
+
+ return Optional.of(funcList);
+ }
+
+ private static <T> Set<Class<? extends T>>
getSubclassesFromJarEntry(URL[] urls, Class<T> targetCls) {
+ Reflections refl = new Reflections(new ConfigurationBuilder().
+ setUrls(urls).
+ addClassLoader(new URLClassLoader(urls)));
+
+ return refl.getSubTypesOf(targetCls);
+ }
+
+ static void buildFunctionsFromUDF(Set<Class<? extends UDF>> classes,
List<FunctionDesc> list, String jarurl) {
+ for (Class<? extends UDF> clazz: classes) {
+ String [] names;
+ String value = null, extended = null;
+
+ Description desc = clazz.getAnnotation(Description.class);
+
+ // Check @Description annotation (if exists)
+ if (desc != null) {
+ names = desc.name().split(",");
+ for (int i=0; i<names.length; i++) {
+ names[i] = names[i].trim();
+ }
+
+ value = desc.value();
+ extended = desc.extended();
+ }
+ else {
+ names = new String [] {clazz.getName().replace('.','_')};
--- End diff --
This part is from Hive code and I guess it is to avoid name conflict.
Keeping it is not bad in my opinion because it can force to use the
annotation.
> Allow Tajo to use Hive UDF
> --------------------------
>
> Key: TAJO-1686
> URL: https://issues.apache.org/jira/browse/TAJO-1686
> Project: Tajo
> Issue Type: New Feature
> Components: Function/UDF
> Reporter: Jaehwa Jung
> Assignee: Jongyoung Park
>
> Hive has been widely used in this area. Many users have maintained lots of
> big tables through Hive metastore using HiveQL and UDFs. Currently, Tajo
> provides own UDF and Hive users can implement their UDFs in Tajo. But if we
> can wrap Hive UDF in Tajo, it seems that they would be able to use Tajo
> easily for their analysis infrastructure.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)