[ 
https://issues.apache.org/jira/browse/TAJO-1686?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15186585#comment-15186585
 ] 

ASF GitHub Bot commented on TAJO-1686:
--------------------------------------

Github user jihoonson commented on a diff in the pull request:

    https://github.com/apache/tajo/pull/929#discussion_r55475465
  
    --- Diff: 
tajo-core/src/main/java/org/apache/tajo/engine/function/hiveudf/HiveFunctionLoader.java
 ---
    @@ -0,0 +1,161 @@
    +/***
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.tajo.engine.function.hiveudf;
    +
    +import org.apache.hadoop.fs.FileStatus;
    +import org.apache.hadoop.fs.FileSystem;
    +import org.apache.hadoop.fs.Path;
    +import org.apache.hadoop.hive.ql.exec.Description;
    +import org.apache.hadoop.hive.ql.exec.UDF;
    +import org.apache.hadoop.hive.ql.udf.UDFType;
    +import org.apache.hadoop.io.Writable;
    +import org.apache.tajo.catalog.FunctionDesc;
    +import org.apache.tajo.catalog.FunctionDescBuilder;
    +import org.apache.tajo.catalog.proto.CatalogProtos;
    +import org.apache.tajo.common.TajoDataTypes;
    +import org.apache.tajo.conf.TajoConf;
    +import org.apache.tajo.exception.TajoInternalError;
    +import org.apache.tajo.function.UDFInvocationDesc;
    +import org.apache.tajo.util.WritableTypeConverter;
    +import org.reflections.Reflections;
    +import org.reflections.util.ConfigurationBuilder;
    +
    +import java.io.IOException;
    +import java.lang.reflect.Method;
    +import java.net.URL;
    +import java.net.URLClassLoader;
    +import java.util.*;
    +
    +public class HiveFunctionLoader {
    +  public static Optional<List<FunctionDesc>> loadHiveUDFs(TajoConf conf) {
    +    ArrayList<FunctionDesc> funcList = new ArrayList<>();
    +    String udfdir = conf.getVar(TajoConf.ConfVars.HIVE_UDF_DIR);
    +
    +    try {
    +      Path udfPath = new Path(udfdir);
    +      FileSystem fs = udfPath.getFileSystem(conf);
    +
    +      if (!fs.isDirectory(udfPath)) {
    +        return Optional.empty();
    +      }
    +
    +      // loop each jar file
    +      for (FileStatus fstatus : fs.listStatus(udfPath, (Path path) -> 
path.getName().endsWith(".jar"))) {
    +
    +        URL[] urls = new URL[]{new URL("jar:" + 
fstatus.getPath().toUri().toURL() + "!/")};
    +
    +        // extract and register UDF's decendants (legacy Hive UDF form)
    +        Set<Class<? extends UDF>> udfClasses = 
getSubclassesFromJarEntry(urls, UDF.class);
    +        if (udfClasses != null) {
    +          buildFunctionsFromUDF(udfClasses, funcList, 
"jar:"+urls[0].getPath());
    +        }
    +      }
    +    } catch (IOException e) {
    +      throw new TajoInternalError(e);
    +    }
    +
    +    return Optional.of(funcList);
    +  }
    +
    +  private static <T> Set<Class<? extends T>> 
getSubclassesFromJarEntry(URL[] urls, Class<T> targetCls) {
    +    Reflections refl = new Reflections(new ConfigurationBuilder().
    +        setUrls(urls).
    +        addClassLoader(new URLClassLoader(urls)));
    +
    +    return refl.getSubTypesOf(targetCls);
    +  }
    +
    +  static void buildFunctionsFromUDF(Set<Class<? extends UDF>> classes, 
List<FunctionDesc> list, String jarurl) {
    +    for (Class<? extends UDF> clazz: classes) {
    +      String [] names;
    +      String value = null, extended = null;
    +
    +      Description desc = clazz.getAnnotation(Description.class);
    +
    +      // Check @Description annotation (if exists)
    +      if (desc != null) {
    +        names = desc.name().split(",");
    +        for (int i=0; i<names.length; i++) {
    +          names[i] = names[i].trim();
    +        }
    +
    +        value = desc.value();
    +        extended = desc.extended();
    +      }
    +      else {
    +        names = new String [] {clazz.getName().replace('.','_')};
    --- End diff --
    
    class.getName() returns canonical name for udf classes, so this line will 
make a very long function name. How about transforming the simple name of the 
class from camel case to snake case?


> Allow Tajo to use Hive UDF
> --------------------------
>
>                 Key: TAJO-1686
>                 URL: https://issues.apache.org/jira/browse/TAJO-1686
>             Project: Tajo
>          Issue Type: New Feature
>          Components: Function/UDF
>            Reporter: Jaehwa Jung
>            Assignee: Jongyoung Park
>
> Hive has been widely used in this area. Many users have maintained lots of 
> big tables through Hive metastore using HiveQL and UDFs. Currently, Tajo 
> provides own UDF and Hive users can implement their UDFs in Tajo. But if we 
> can wrap Hive UDF in Tajo, it seems that they would be able to use Tajo 
> easily for their analysis infrastructure.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to