IMPALA-7203. Support UDFs in CatalogdMetaProvider

This adds support to fetch the list of function names within a database,
and then to fetch the list of overloads for a given function name. These
items are cached on the coordinator and invalidated when minimal
function objects are seen on the minimal catalog topic stream.

Aside from the straight-forward plumbing of this new RPC, it's worth
noting that this patch changes the MetaProvider interface to provide
Impala Functions directly instead of HMS Function objects. This means
that we will now fully support all the types of functions supported in
legacy catalogs. As such, test_udfs now passes.

Making this change simplified the code in LocalDb but also means that
DirectMetaProvider no longer supports fetching functions. When we move
to trying to eliminate catalogd altogether at some point, we can revive
this code. For now, I just throw an exception for the function-related
code in DirectMetaProvider, as it's unused.

The one other notable thing here is that TFunction now has optional
fields where it used to have required ones. This was necessary in order
to send invalidations as TCatalogObjects. Given that TFunction is an
internal-only construct, this shouldn't raise compatibility issues.

Change-Id: Ifef8ece9f214dca9441833b00f65c7c152d0ab53
Reviewed-on: http://gerrit.cloudera.org:8080/11359
Reviewed-by: Todd Lipcon <t...@apache.org>
Tested-by: Todd Lipcon <t...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/1c6058fa
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/1c6058fa
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/1c6058fa

Branch: refs/heads/master
Commit: 1c6058fa16fc1627799c03112ea3e0ee3c08d0e1
Parents: 0acf1b3
Author: Todd Lipcon <t...@apache.org>
Authored: Wed Aug 29 21:47:44 2018 -0700
Committer: Todd Lipcon <t...@apache.org>
Committed: Thu Sep 6 17:27:07 2018 +0000

----------------------------------------------------------------------
 common/thrift/CatalogService.thrift             |  12 +-
 common/thrift/Types.thrift                      |  32 +++-
 .../impala/catalog/CatalogServiceCatalog.java   |  24 ++-
 .../main/java/org/apache/impala/catalog/Db.java |   4 +
 .../org/apache/impala/catalog/Function.java     |   4 +
 .../catalog/local/CatalogdMetaProvider.java     | 151 +++++++++++++++----
 .../catalog/local/DirectMetaProvider.java       |  26 ++--
 .../apache/impala/catalog/local/LocalDb.java    | 141 +++--------------
 .../impala/catalog/local/MetaProvider.java      |   8 +-
 9 files changed, 232 insertions(+), 170 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/1c6058fa/common/thrift/CatalogService.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/CatalogService.thrift 
b/common/thrift/CatalogService.thrift
index 26474e1..6071110 100644
--- a/common/thrift/CatalogService.thrift
+++ b/common/thrift/CatalogService.thrift
@@ -345,15 +345,19 @@ struct TPartialTableInfo {
 struct TDbInfoSelector {
   // The response should include the HMS Database object.
   1: bool want_hms_database
+
   // The response should include the list of table names in the DB.
   2: bool want_table_names
-  // TODO(todd): function names
+
+  // The response should include the list of function names in the DB.
+  3: bool want_function_names
 }
 
 // Returned information about a Database, as selected by TDbInfoSelector.
 struct TPartialDbInfo {
   1: optional hive_metastore.Database hms_database
   2: optional list<string> table_names
+  3: optional list<string> function_names
 }
 
 // RPC request for GetPartialCatalogObject.
@@ -361,7 +365,8 @@ struct TGetPartialCatalogObjectRequest {
   1: required CatalogServiceVersion protocol_version = CatalogServiceVersion.V1
 
   // A catalog object descriptor: a TCatalogObject with the object name and 
type fields
-  // set.
+  // set. This may be a TABLE, DB, CATALOG, or FUNCTION. The selectors below 
can
+  // further restrict what information should be returned.
   2: required CatalogObjects.TCatalogObject object_desc
 
   3: optional TTableInfoSelector table_info_selector
@@ -379,6 +384,9 @@ struct TGetPartialCatalogObjectResponse {
   3: optional TPartialTableInfo table_info
   4: optional TPartialDbInfo db_info
   5: optional TPartialCatalogInfo catalog_info
+
+  // Functions are small enough that we return them wholesale.
+  6: optional list<Types.TFunction> functions
 }
 
 

http://git-wip-us.apache.org/repos/asf/impala/blob/1c6058fa/common/thrift/Types.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/Types.thrift b/common/thrift/Types.thrift
index 3edf42b..d1dd5a3 100644
--- a/common/thrift/Types.thrift
+++ b/common/thrift/Types.thrift
@@ -195,22 +195,40 @@ struct TAggregateFunction {
   10: optional bool ignores_distinct
 }
 
-// Represents a function in the Catalog.
+// Represents a function in the Catalog or a query plan, or may be used
+// in a minimal form in order to simply specify a function (e.g. when
+// included in a minimal catalog update or a TGetPartialCatalogInfo request).
+//
+// In the case of this latter 'specifier' use case, only the name must be
+// set.
 struct TFunction {
   // Fully qualified function name.
   1: required TFunctionName name
 
+  // -------------------------------------------------------------------------
+  // The following fields are always set, unless this TFunction is being used
+  // as a name-only "specifier".
+  // -------------------------------------------------------------------------
+
   // Type of the udf. e.g. hive, native, ir
-  2: required TFunctionBinaryType binary_type
+  2: optional TFunctionBinaryType binary_type
 
   // The types of the arguments to the function
-  3: required list<TColumnType> arg_types
+  3: optional list<TColumnType> arg_types
 
   // Return type for the function.
-  4: required TColumnType ret_type
+  4: optional TColumnType ret_type
 
   // If true, this function takes var args.
-  5: required bool has_var_args
+  5: optional bool has_var_args
+
+  // -------------------------------------------------------------------------
+  // The following fields are truly optional, even in "full" function objects.
+  //
+  // Note that TFunction objects are persisted in the user's metastore, so
+  // in many cases these fields are optional because they have been added
+  // incrementally across releases of Impala.
+  // -------------------------------------------------------------------------
 
   // Optional comment to attach to the function
   6: optional string comment
@@ -234,4 +252,8 @@ struct TFunction {
   // a no-op.
   // Not set when stored in the catalog.
   12: optional i64 last_modified_time
+
+
+  // NOTE: when adding fields to this struct, do not renumber the field IDs or
+  // add new required fields. This struct is serialized into user metastores.
 }

http://git-wip-us.apache.org/repos/asf/impala/blob/1c6058fa/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
----------------------------------------------------------------------
diff --git 
a/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java 
b/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
index f02a2f3..a40d09d 100644
--- a/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
+++ b/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
@@ -54,6 +54,7 @@ import org.apache.impala.thrift.TCatalogObject;
 import org.apache.impala.thrift.TCatalogObjectType;
 import org.apache.impala.thrift.TCatalogUpdateResult;
 import org.apache.impala.thrift.TDatabase;
+import org.apache.impala.thrift.TFunction;
 import org.apache.impala.thrift.TGetCatalogUsageResponse;
 import org.apache.impala.thrift.TGetPartialCatalogObjectRequest;
 import org.apache.impala.thrift.TGetPartialCatalogObjectResponse;
@@ -540,8 +541,10 @@ public class CatalogServiceCatalog extends Catalog {
         // and this code is also under some churn at the moment. So, we'll 
just publish
         // the full information rather than doing fetch-on-demand.
         return obj;
-      case DATA_SOURCE:
       case FUNCTION:
+        min.setFn(new TFunction(obj.fn.getName()));
+        break;
+      case DATA_SOURCE:
       case HDFS_CACHE_POOL:
         // These are currently not cached by v2 impalad.
         // TODO(todd): handle these items.
@@ -2100,6 +2103,25 @@ public class CatalogServiceCatalog extends Catalog {
         table.getLock().unlock();
       }
     }
+    case FUNCTION: {
+      versionLock_.readLock().lock();
+      try {
+        Db db = getDb(objectDesc.fn.name.db_name);
+        if (db == null) {
+          throw new CatalogException(
+              "Database not found: " + objectDesc.fn.name.db_name);
+        }
+
+        List<Function> funcs = 
db.getFunctions(objectDesc.fn.name.function_name);
+        TGetPartialCatalogObjectResponse resp = new 
TGetPartialCatalogObjectResponse();
+        List<TFunction> thriftFuncs = 
Lists.newArrayListWithCapacity(funcs.size());
+        for (Function f : funcs) thriftFuncs.add(f.toThrift());
+        resp.setFunctions(thriftFuncs);
+        return resp;
+      } finally {
+        versionLock_.readLock().unlock();
+      }
+    }
     default:
       throw new CatalogException("Unable to fetch partial info for type: " +
           req.object_desc.type);

http://git-wip-us.apache.org/repos/asf/impala/blob/1c6058fa/fe/src/main/java/org/apache/impala/catalog/Db.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/Db.java 
b/fe/src/main/java/org/apache/impala/catalog/Db.java
index 7e15210..f5bc0b2 100644
--- a/fe/src/main/java/org/apache/impala/catalog/Db.java
+++ b/fe/src/main/java/org/apache/impala/catalog/Db.java
@@ -45,6 +45,7 @@ import org.apache.impala.util.FunctionUtils;
 import org.apache.impala.util.PatternMatcher;
 
 import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 
@@ -461,6 +462,9 @@ public class Db extends CatalogObjectImpl implements FeDb {
     if (selector.want_table_names) {
       resp.db_info.table_names = getAllTableNames();
     }
+    if (selector.want_function_names) {
+      resp.db_info.function_names = ImmutableList.copyOf(functions_.keySet());
+    }
     return resp;
   }
 }

http://git-wip-us.apache.org/repos/asf/impala/blob/1c6058fa/fe/src/main/java/org/apache/impala/catalog/Function.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/Function.java 
b/fe/src/main/java/org/apache/impala/catalog/Function.java
index 60893a4..e0c98be 100644
--- a/fe/src/main/java/org/apache/impala/catalog/Function.java
+++ b/fe/src/main/java/org/apache/impala/catalog/Function.java
@@ -334,6 +334,10 @@ public class Function extends CatalogObjectImpl {
   }
 
   public static Function fromThrift(TFunction fn) {
+    Preconditions.checkArgument(fn.isSetBinary_type());
+    Preconditions.checkArgument(fn.isSetArg_types());
+    Preconditions.checkArgument(fn.isSetRet_type());
+    Preconditions.checkArgument(fn.isSetHas_var_args());
     List<Type> argTypes = Lists.newArrayList();
     for (TColumnType t: fn.getArg_types()) {
       argTypes.add(Type.fromThrift(t));

http://git-wip-us.apache.org/repos/asf/impala/blob/1c6058fa/fe/src/main/java/org/apache/impala/catalog/local/CatalogdMetaProvider.java
----------------------------------------------------------------------
diff --git 
a/fe/src/main/java/org/apache/impala/catalog/local/CatalogdMetaProvider.java 
b/fe/src/main/java/org/apache/impala/catalog/local/CatalogdMetaProvider.java
index 9fdcb03..6888463 100644
--- a/fe/src/main/java/org/apache/impala/catalog/local/CatalogdMetaProvider.java
+++ b/fe/src/main/java/org/apache/impala/catalog/local/CatalogdMetaProvider.java
@@ -30,7 +30,6 @@ import java.util.concurrent.atomic.AtomicLong;
 
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.Database;
-import org.apache.hadoop.hive.metastore.api.Function;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
 import org.apache.hadoop.hive.metastore.api.Partition;
@@ -40,6 +39,7 @@ import org.apache.impala.catalog.AuthorizationPolicy;
 import org.apache.impala.catalog.Catalog;
 import org.apache.impala.catalog.CatalogDeltaLog;
 import org.apache.impala.catalog.CatalogException;
+import org.apache.impala.catalog.Function;
 import org.apache.impala.catalog.Principal;
 import org.apache.impala.catalog.PrincipalPrivilege;
 import org.apache.impala.catalog.HdfsPartition.FileDescriptor;
@@ -55,6 +55,8 @@ import org.apache.impala.thrift.TCatalogObjectType;
 import org.apache.impala.thrift.TDatabase;
 import org.apache.impala.thrift.TDbInfoSelector;
 import org.apache.impala.thrift.TErrorCode;
+import org.apache.impala.thrift.TFunction;
+import org.apache.impala.thrift.TFunctionName;
 import org.apache.impala.thrift.TGetPartialCatalogObjectRequest;
 import org.apache.impala.thrift.TGetPartialCatalogObjectResponse;
 import org.apache.impala.thrift.THdfsFileDesc;
@@ -376,6 +378,19 @@ public class CatalogdMetaProvider implements MetaProvider {
     return req;
   }
 
+  private TGetPartialCatalogObjectRequest newReqForFunction(String dbName,
+      String funcName) {
+    TGetPartialCatalogObjectRequest req = new 
TGetPartialCatalogObjectRequest();
+    req.object_desc = new TCatalogObject();
+    req.object_desc.setType(TCatalogObjectType.FUNCTION);
+    req.object_desc.fn = new TFunction();
+    req.object_desc.fn.name = new TFunctionName();
+    req.object_desc.fn.name.db_name = dbName;
+    req.object_desc.fn.name.function_name = funcName;
+    return req;
+  }
+
+
   @Override
   public Database loadDb(final String dbName) throws TException {
     return loadWithCaching("database metadata for " + dbName,
@@ -713,16 +728,47 @@ public class CatalogdMetaProvider implements MetaProvider 
{
   }
 
   @Override
-  public List<String> loadFunctionNames(String dbName) throws MetaException, 
TException {
-    LOG.trace("loadFunctionNames({}): not cached yet", dbName);
-    return directProvider_.loadFunctionNames(dbName);
+  public List<String> loadFunctionNames(final String dbName) throws TException 
{
+    return loadWithCaching("function names for database " + dbName,
+        new DbCacheKey(dbName, DbCacheKey.DbInfoType.FUNCTION_NAMES),
+        new Callable<ImmutableList<String>>() {
+          @Override
+          public ImmutableList<String> call() throws Exception {
+            TGetPartialCatalogObjectRequest req = newReqForDb(dbName);
+            req.db_info_selector.want_function_names = true;
+            TGetPartialCatalogObjectResponse resp = sendRequest(req);
+            checkResponse(resp.db_info != null && resp.db_info.function_names 
!= null,
+                req, "missing expected function names");
+            return ImmutableList.copyOf(resp.db_info.function_names);
+          }
+      });
   }
 
   @Override
-  public Function getFunction(String dbName, String functionName)
-      throws MetaException, TException {
-    LOG.trace("getFunction({}, {}): not cached yet", dbName, functionName);
-    return directProvider_.getFunction(dbName, functionName);
+  public ImmutableList<Function> loadFunction(final String dbName,
+      final String functionName) throws TException {
+    ImmutableList<TFunction> thriftFuncs = loadWithCaching(
+        "function " + dbName + "." + functionName,
+        new FunctionsCacheKey(dbName, functionName),
+        new Callable<ImmutableList<TFunction>>() {
+          @Override
+          public ImmutableList<TFunction> call() throws Exception {
+            TGetPartialCatalogObjectRequest req = newReqForFunction(dbName, 
functionName);
+            TGetPartialCatalogObjectResponse resp = sendRequest(req);
+            checkResponse(resp.functions != null, req, "missing expected 
function");
+            return ImmutableList.copyOf(resp.functions);
+          }
+      });
+    // It may seem wasteful to cache the thrift function objects and then 
always
+    // convert them back to non-Thrift 'Functions' on every load. However, 
loading
+    // functions is rare enough and this ensures we don't accidentally leak 
something
+    // mutable back to the catalog layer. If this turns out to be a problem we 
can
+    // consider wrapping Function with an immutable 'FeFunction' interface.
+    ImmutableList.Builder<Function> funcs = ImmutableList.builder();
+    for (TFunction thriftFunc : thriftFuncs) {
+      funcs.add(Function.fromThrift(thriftFunc));
+    }
+    return funcs.build();
   }
 
   /**
@@ -928,17 +974,27 @@ public class CatalogdMetaProvider implements MetaProvider 
{
       // Currently adding or dropping a table doesn't send an invalidation for 
the
       // DB, so we'll be coarse-grained here and invalidate the DB table list 
when
       // any table change happens. It's relatively cheap to re-fetch this.
-      invalidateCacheForDb(obj.table.db_name, 
DbCacheKey.DbInfoType.TABLE_NAMES,
+      invalidateCacheForDb(obj.table.db_name,
+          ImmutableList.of(DbCacheKey.DbInfoType.TABLE_NAMES),
+          invalidated);
+      break;
+    case FUNCTION:
+      // Same as above: if we see a function, it might be new or deleted and 
we should
+      // refresh the list of functions in the DB to be safe.
+      invalidateCacheForDb(obj.fn.name.db_name,
+          ImmutableList.of(DbCacheKey.DbInfoType.FUNCTION_NAMES),
+          invalidated);
+      invalidateCacheForFunction(obj.fn.name.db_name, 
obj.fn.name.function_name,
           invalidated);
       break;
     case DATABASE:
       if (cache_.asMap().remove(DB_LIST_CACHE_KEY) != null) {
         invalidated.add("list of database names");
       }
-      invalidateCacheForDb(obj.db.db_name, DbCacheKey.DbInfoType.TABLE_NAMES,
-          invalidated);
-      invalidateCacheForDb(obj.db.db_name, DbCacheKey.DbInfoType.HMS_METADATA,
-          invalidated);
+      invalidateCacheForDb(obj.db.db_name, ImmutableList.of(
+          DbCacheKey.DbInfoType.TABLE_NAMES,
+          DbCacheKey.DbInfoType.HMS_METADATA,
+          DbCacheKey.DbInfoType.FUNCTION_NAMES), invalidated);
       break;
 
     default:
@@ -950,15 +1006,18 @@ public class CatalogdMetaProvider implements 
MetaProvider {
   }
 
   /**
-   * Invalidate cached metadata of the given type for the given database. If 
anything
+   * Invalidate cached metadata of the given types for the given database. If 
anything
    * was invalidated, adds a human-readable string to 'invalidated' indicating 
the
    * invalidated metadata.
    */
-  private void invalidateCacheForDb(String dbName, DbCacheKey.DbInfoType type,
+  private void invalidateCacheForDb(String dbName, 
Iterable<DbCacheKey.DbInfoType> types,
       List<String> invalidated) {
-    DbCacheKey key = new DbCacheKey(dbName, type);
-    if (cache_.asMap().remove(key) != null) {
-      invalidated.add(type + " for DB " + dbName);
+    // TODO(todd) check whether we need to lower-case/canonicalize dbName?
+    for (DbCacheKey.DbInfoType type: types) {
+      DbCacheKey key = new DbCacheKey(dbName, type);
+      if (cache_.asMap().remove(key) != null) {
+        invalidated.add(type + " for DB " + dbName);
+      }
     }
   }
 
@@ -968,6 +1027,7 @@ public class CatalogdMetaProvider implements MetaProvider {
    */
   private void invalidateCacheForTable(String dbName, String tblName,
       List<String> invalidated) {
+    // TODO(todd) check whether we need to lower-case/canonicalize dbName and 
tblName?
     TableCacheKey key = new TableCacheKey(dbName, tblName);
     if (cache_.asMap().remove(key) != null) {
       invalidated.add("table " + dbName + "." + tblName);
@@ -975,6 +1035,19 @@ public class CatalogdMetaProvider implements MetaProvider 
{
   }
 
   /**
+   * Invalidate cached metadata for the given function. If anything was 
invalidated, adds
+   * a human-readable string to 'invalidated' indicating the invalidated 
metadata.
+   */
+  private void invalidateCacheForFunction(String dbName, String functionName,
+      List<String> invalidated) {
+    // TODO(todd) check whether we need to lower-case/canonicalize names?
+    FunctionsCacheKey key = new FunctionsCacheKey(dbName, functionName);
+    if (cache_.asMap().remove(key) != null) {
+      invalidated.add("function " + dbName + "." + functionName);
+    }
+  }
+
+  /**
    * Reference to a partition within a table. We remember the partition's ID 
and pass
    * that back to the catalog in subsequent requests back to fetch the details 
of the
    * partition, since the ID is smaller than the name and provides a unique 
(not-reused)
@@ -1083,34 +1156,54 @@ public class CatalogdMetaProvider implements 
MetaProvider {
   }
 
   /**
-   * Base class for cache keys related to a specific table. Such keys are 
explicitly
-   * invalidated by 'invalidateCacheForTable' above.
+   * Base class for cache key for a named item within a database (eg table or 
function).
    */
-  private static class TableCacheKey {
+  private static class DbChildCacheKey {
     final String dbName_;
-    final String tableName_;
+    final String childName_;
 
-    TableCacheKey(String dbName, String tableName) {
+    protected DbChildCacheKey(String dbName, String childName) {
       this.dbName_ = dbName;
-      this.tableName_ = tableName;
+      this.childName_ = childName;
     }
 
     @Override
     public int hashCode() {
-      return Objects.hashCode(dbName_, tableName_, getClass());
+      return Objects.hashCode(dbName_, childName_, getClass());
     }
     @Override
     public boolean equals(Object obj) {
       if (obj == null || !obj.getClass().equals(getClass())) {
         return false;
       }
-      TableCacheKey other = (TableCacheKey)obj;
-      return tableName_.equals(other.tableName_) &&
+      DbChildCacheKey other = (DbChildCacheKey)obj;
+      return childName_.equals(other.childName_) &&
           dbName_.equals(other.dbName_);
     }
   }
 
   /**
+   * Base class for cache keys related to a specific table. Such keys are 
explicitly
+   * invalidated by 'invalidateCacheForTable' above.
+   */
+  private static class TableCacheKey extends DbChildCacheKey {
+    TableCacheKey(String dbName, String tableName) {
+      super(dbName, tableName);
+    }
+  }
+
+  /**
+   * Cache key for a the set of overloads of a function given a name.
+   * Invalidated by 'invalidateCacheForFunction' above.
+   * Values are ImmutableList<TFunction>.
+   */
+  private static class FunctionsCacheKey extends DbChildCacheKey {
+    FunctionsCacheKey(String dbName, String funcName) {
+      super(dbName, funcName);
+    }
+  }
+
+  /**
    * Base class for cache keys that are tied to a particular version of a 
table.
    * These keys are never explicitly invalidated. Instead, we rely on the fact 
that,
    * when the table is updated, it has a new version number. This results in 
making
@@ -1226,7 +1319,9 @@ public class CatalogdMetaProvider implements MetaProvider 
{
       /** Cache the HMS Database object */
       HMS_METADATA,
       /** Cache an ImmutableList<String> for table names within the DB */
-      TABLE_NAMES
+      TABLE_NAMES,
+      /** Cache an ImmutableList<String> for function names within the DB */
+      FUNCTION_NAMES
     }
     private final String dbName_;
     private final DbInfoType type_;

http://git-wip-us.apache.org/repos/asf/impala/blob/1c6058fa/fe/src/main/java/org/apache/impala/catalog/local/DirectMetaProvider.java
----------------------------------------------------------------------
diff --git 
a/fe/src/main/java/org/apache/impala/catalog/local/DirectMetaProvider.java 
b/fe/src/main/java/org/apache/impala/catalog/local/DirectMetaProvider.java
index 6f13755..edba217 100644
--- a/fe/src/main/java/org/apache/impala/catalog/local/DirectMetaProvider.java
+++ b/fe/src/main/java/org/apache/impala/catalog/local/DirectMetaProvider.java
@@ -33,13 +33,13 @@ import org.apache.hadoop.fs.RemoteIterator;
 import org.apache.hadoop.hive.common.FileUtils;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.Database;
-import org.apache.hadoop.hive.metastore.api.Function;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.metastore.api.UnknownDBException;
 import org.apache.impala.catalog.AuthorizationPolicy;
+import org.apache.impala.catalog.Function;
 import org.apache.impala.catalog.HdfsPartition.FileDescriptor;
 import org.apache.impala.catalog.HdfsTable;
 import org.apache.impala.catalog.MetaStoreClientPool;
@@ -258,22 +258,22 @@ class DirectMetaProvider implements MetaProvider {
 
   @Override
   public List<String> loadFunctionNames(String dbName) throws TException {
-    Preconditions.checkNotNull(dbName);
-    try (MetaStoreClient c = msClientPool_.getClient()) {
-      return ImmutableList.copyOf(c.getHiveClient().getFunctions(
-          dbName, /*pattern=*/null));
-    }
+    // NOTE: this is a bit tricky to implement since functions may be stored as
+    // HMS functions or serialized functions in the DB parameters themselves. 
We
+    // need to do some refactoring to support this in DirectMetaProvider. Some 
code
+    // used to exist to do this in LocalDb -- look back in git history if you 
want to
+    // revive the usefulness of DirectMetaProvider.
+    throw new UnsupportedOperationException(
+        "Functions not supported by DirectMetaProvider");
   }
 
 
   @Override
-  public Function getFunction(String dbName, String functionName) throws 
TException {
-    Preconditions.checkNotNull(dbName);
-    Preconditions.checkNotNull(functionName);
-
-    try (MetaStoreClient c = msClientPool_.getClient()) {
-      return c.getHiveClient().getFunction(dbName, functionName);
-    }
+  public ImmutableList<Function> loadFunction(String dbName, String 
functionName)
+      throws TException {
+    // See above.
+    throw new UnsupportedOperationException(
+        "Functions not supported by DirectMetaProvider");
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/impala/blob/1c6058fa/fe/src/main/java/org/apache/impala/catalog/local/LocalDb.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/local/LocalDb.java 
b/fe/src/main/java/org/apache/impala/catalog/local/LocalDb.java
index f71d206..c7f7116 100644
--- a/fe/src/main/java/org/apache/impala/catalog/local/LocalDb.java
+++ b/fe/src/main/java/org/apache/impala/catalog/local/LocalDb.java
@@ -18,7 +18,6 @@
 package org.apache.impala.catalog.local;
 
 import java.util.Collections;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 
@@ -33,8 +32,6 @@ import org.apache.impala.catalog.FeKuduTable;
 import org.apache.impala.catalog.FeTable;
 import org.apache.impala.catalog.Function;
 import org.apache.impala.catalog.Function.CompareMode;
-import org.apache.impala.common.ImpalaRuntimeException;
-import org.apache.impala.service.BackendConfig;
 import org.apache.impala.thrift.TDatabase;
 import org.apache.impala.thrift.TFunctionCategory;
 import org.apache.impala.util.FunctionUtils;
@@ -73,7 +70,7 @@ class LocalDb implements FeDb {
   /**
    * Map of function name to list of signatures for that function name.
    */
-  private Map<String, FunctionOverloads> functions_;
+  private Map<String, List<Function>> functions_;
 
   public LocalDb(LocalCatalog catalog, String dbName) {
     Preconditions.checkNotNull(catalog);
@@ -173,53 +170,29 @@ class LocalDb implements FeDb {
   @Override
   public Function getFunction(Function desc, CompareMode mode) {
     loadFunction(desc.functionName());
-    FunctionOverloads funcs = functions_.get(desc.functionName());
+    List<Function> funcs = functions_.get(desc.functionName());
     if (funcs == null) return null;
     return FunctionUtils.resolveFunction(funcs, desc, mode);
   }
 
   /**
-   * Populate the 'functions_' map if not already populated.
-   * This handles loading persistent native functions, since they're already
-   * present in the DB-level metadata, and loading the list of Java function
-   * names. The Java function signatures themselves are lazy-loaded as 
necessary
-   * by loadFunction(...).
+   * Populate the 'functions_' map with the correct set of keys corresponding 
to
+   * the functions in this database. The values will be 'null' to indicate 
that the
+   * functions themselves have not yet been loaded.
    */
   private void loadFunctionNames() {
     if (functions_ != null) return;
 
-    // Load the Java functions names. We don't load the actual metadata
-    // for them unless they get looked up.
-    List<String> javaFuncNames;
-
+    List<String> funcNames;
     try {
-      javaFuncNames = catalog_.getMetaProvider().loadFunctionNames(name_);
+      funcNames = catalog_.getMetaProvider().loadFunctionNames(name_);
     } catch (TException e) {
       throw new LocalCatalogException(String.format(
-          "Could not load functions for database '%s' from HMS", name_), e);
-    }
-
-    Map<String, FunctionOverloads> newMap = Maps.newHashMap();
-    for (String fn : javaFuncNames) {
-      newMap.put(fn, new FunctionOverloads(/*javaNeedsLoad=*/true));
-    }
-
-    // Load native functions.
-    List<Function> nativeFuncs = 
FunctionUtils.deserializeNativeFunctionsFromDbParams(
-        getMetaStoreDb().getParameters());
-    for (Function fn : nativeFuncs) {
-      String fnName = fn.functionName();
-      FunctionOverloads dst = newMap.get(fnName);
-      if (dst == null) {
-        // We know there were no Java functions by this name since we didn't 
see
-        // this function above in the HMS-derived function list.
-        dst = new FunctionOverloads(/*javaNeedsLoad=*/false);
-        newMap.put(fnName, dst);
-      }
-      dst.add(fn);
+          "Could not load function names for database '%s'", name_), e);
     }
 
-    functions_ = newMap;
+    functions_ = Maps.newHashMapWithExpectedSize(funcNames.size());
+    for (String fn : funcNames) functions_.put(fn, null);
   }
 
   /**
@@ -229,38 +202,29 @@ class LocalDb implements FeDb {
   private void loadFunction(String functionName) {
     loadFunctionNames();
 
-    FunctionOverloads overloads = functions_.get(functionName);
-    // If the function isn't in the map at all, then it doesn't exist.
-    if (overloads == null) return;
 
-    // If it's in the map, the native functions will already have been loaded,
-    // since we get that info from the DB params. But, we may still need to
-    // load Java info.
-    if (!overloads.javaNeedsLoad()) return;
+    // If the function isn't in the map at all, then the function doesn't 
exist.
+    if (!functions_.containsKey(functionName)) return;
+    List<Function> overloads = functions_.get(functionName);
+
+    // If it's in the map, it might have a null value, or it might already be 
loaded.
+    // If it's already loaded, we're done.
+    if (overloads != null) return;
 
-    org.apache.hadoop.hive.metastore.api.Function msFunc;
-    try {
-      msFunc = catalog_.getMetaProvider().getFunction(name_, functionName);
-    } catch (TException e) {
-      throw new LocalCatalogException(String.format(
-          "Could not load function '%s.%s' from HMS",
-          name_, functionName), e);
-    }
 
     try {
-      overloads.setJavaFunctions(FunctionUtils.extractFunctions(name_, msFunc,
-          BackendConfig.INSTANCE.getBackendCfg().local_library_path));
-    } catch (ImpalaRuntimeException e) {
-      throw new LocalCatalogException(String.format(
-          "Could not load Java function definitions for '%s.%s'",
+      overloads = catalog_.getMetaProvider().loadFunction(name_, functionName);
+    } catch (TException e) {
+      throw new LocalCatalogException(String.format("Could not load function 
'%s.%s'",
           name_, functionName), e);
     }
+    functions_.put(functionName,  overloads);
   }
 
   @Override
   public List<Function> getFunctions(String functionName) {
     loadFunction(functionName);
-    FunctionOverloads funcs = functions_.get(functionName);
+    List<Function> funcs = functions_.get(functionName);
     if (funcs == null) return Collections.emptyList();
     return FunctionUtils.getVisibleFunctions(funcs);
   }
@@ -269,7 +233,7 @@ class LocalDb implements FeDb {
   public List<Function> getFunctions(
       TFunctionCategory category, String functionName) {
     loadFunction(functionName);
-    FunctionOverloads funcs = functions_.get(functionName);
+    List<Function> funcs = functions_.get(functionName);
     if (funcs == null) return Collections.emptyList();
     return FunctionUtils.getVisibleFunctionsInCategory(funcs, category);
   }
@@ -309,63 +273,4 @@ class LocalDb implements FeDb {
   LocalCatalog getCatalog() {
     return catalog_;
   }
-
-  /**
-   * Captures the set of function overloads with a given name. This tracks the
-   * lazy-loading state of whether the Java signatures have been loaded yet,
-   * and ensures that only a properly-sorted list is exposed.
-   */
-  private static class FunctionOverloads implements Iterable<Function> {
-    /**
-     * The loaded functions, or null if no functions have not been loaded.
-     */
-    private List<Function> functions_;
-
-    /**
-     * The function list is lazily sorted only if it gets iterated, so that
-     * we don't pay any cost for sorting in the case when function names are
-     * loaded but a given function is not actually resolved in a query.
-     */
-    private boolean needsSort_ = false;
-
-    /**
-     * Whether Java functions still need to be loaded.
-     */
-    private boolean javaNeedsLoad_ = true;
-
-    FunctionOverloads(boolean javaNeedsLoad) {
-      this.javaNeedsLoad_ = javaNeedsLoad;
-    }
-
-    @Override
-    public Iterator<Function> iterator() {
-      Preconditions.checkState(!javaNeedsLoad_);
-      if (needsSort_) {
-        Collections.sort(functions_, FunctionUtils.FUNCTION_RESOLUTION_ORDER);
-        needsSort_ = false;
-      }
-      return functions_.iterator();
-    }
-
-    public void add(Function fn) {
-      if (functions_ == null) functions_ = Lists.newArrayList();
-      functions_.add(fn);
-      needsSort_ = true;
-    }
-
-    public boolean javaNeedsLoad() {
-      return javaNeedsLoad_;
-    }
-
-    public void setJavaFunctions(List<Function> fns) {
-      Preconditions.checkState(javaNeedsLoad_);
-      javaNeedsLoad_ = false;
-      needsSort_ |= !fns.isEmpty();
-      if (functions_ == null) {
-        functions_ = fns;
-        return;
-      }
-      functions_.addAll(fns);
-    }
-  }
 }

http://git-wip-us.apache.org/repos/asf/impala/blob/1c6058fa/fe/src/main/java/org/apache/impala/catalog/local/MetaProvider.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/local/MetaProvider.java 
b/fe/src/main/java/org/apache/impala/catalog/local/MetaProvider.java
index 5431e30..60739d3 100644
--- a/fe/src/main/java/org/apache/impala/catalog/local/MetaProvider.java
+++ b/fe/src/main/java/org/apache/impala/catalog/local/MetaProvider.java
@@ -22,13 +22,13 @@ import java.util.Map;
 
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.Database;
-import org.apache.hadoop.hive.metastore.api.Function;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.metastore.api.UnknownDBException;
 import org.apache.impala.catalog.AuthorizationPolicy;
+import org.apache.impala.catalog.Function;
 import org.apache.impala.catalog.HdfsPartition.FileDescriptor;
 import org.apache.impala.common.Pair;
 import org.apache.impala.thrift.TNetworkAddress;
@@ -84,9 +84,11 @@ interface MetaProvider {
   List<String> loadFunctionNames(String dbName) throws TException;
 
   /**
-   * Retrieve the specified function from the metadata store.
+   * Retrieve the specified function from the metadata store. A function may 
have
+   * many overloads with the same name.
    */
-  Function getFunction(String dbName, String functionName) throws TException;
+  ImmutableList<Function> loadFunction(String dbName, String functionName)
+      throws TException;
 
   /**
    * Load the given partitions from the specified table.

Reply via email to