[30/61] [partial] incubator-impala git commit: IMPALA-3786: Replace "cloudera" with "apache" (part 1)

kwho Thu, 29 Sep 2016 19:15:57 -0700

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/catalog/DatabaseNotFoundException.java
----------------------------------------------------------------------
diff --git 
a/fe/src/main/java/com/cloudera/impala/catalog/DatabaseNotFoundException.java 
b/fe/src/main/java/com/cloudera/impala/catalog/DatabaseNotFoundException.java
deleted file mode 100644
index 8affb11..0000000
--- 
a/fe/src/main/java/com/cloudera/impala/catalog/DatabaseNotFoundException.java
+++ /dev/null
@@ -1,29 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package com.cloudera.impala.catalog;
-
-
-/**
- * Thrown when a database cannot be found in the catalog.
- */
-public class DatabaseNotFoundException extends CatalogException {
-  // Dummy serial ID to satisfy Eclipse
-  private static final long serialVersionUID = -2203080667446640542L;
-
-  public DatabaseNotFoundException(String s) { super(s); }
-}
\ No newline at end of file


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/catalog/Db.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/catalog/Db.java 
b/fe/src/main/java/com/cloudera/impala/catalog/Db.java
deleted file mode 100644
index a9150fe..0000000
--- a/fe/src/main/java/com/cloudera/impala/catalog/Db.java
+++ /dev/null
@@ -1,495 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package com.cloudera.impala.catalog;
-
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.commons.codec.binary.Base64;
-import org.apache.thrift.protocol.TCompactProtocol;
-import org.apache.thrift.TException;
-import org.apache.thrift.TSerializer;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.cloudera.impala.catalog.Function;
-import com.cloudera.impala.common.ImpalaException;
-import com.cloudera.impala.common.ImpalaRuntimeException;
-import com.cloudera.impala.common.JniUtil;
-import com.cloudera.impala.thrift.TCatalogObjectType;
-import com.cloudera.impala.thrift.TDatabase;
-import com.cloudera.impala.thrift.TFunction;
-import com.cloudera.impala.thrift.TFunctionBinaryType;
-import com.cloudera.impala.thrift.TFunctionCategory;
-import com.cloudera.impala.util.PatternMatcher;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-
-/**
- * Internal representation of db-related metadata. Owned by Catalog instance.
- * Not thread safe.
- *
- * Tables are stored in a map from the table name to the table object. They may
- * be loaded 'eagerly' at construction or 'lazily' on first reference.
- * Tables are accessed via getTable which may trigger a metadata read in two 
cases:
- *  * if the table has never been loaded
- *  * if the table loading failed on the previous attempt
- *
- * Native user added functions are persisted to the parameters map of the hive 
metastore
- * db object corresponding to this instance. This map's key is the function 
signature and
- * value is the base64 representation of the thrift serialized function object.
- *
- */
-public class Db implements CatalogObject {
-  private static final Logger LOG = LoggerFactory.getLogger(Db.class);
-  private final Catalog parentCatalog_;
-  private final TDatabase thriftDb_;
-  private long catalogVersion_ = Catalog.INITIAL_CATALOG_VERSION;
-
-  public static final String FUNCTION_INDEX_PREFIX = 
"impala_registered_function_";
-
-  // Hive metastore imposes a limit of 4000 bytes on the key and value strings
-  // in DB parameters map. We need ensure that this limit isn't crossed
-  // while serializing functions to the metastore.
-  private static final int HIVE_METASTORE_DB_PARAM_LIMIT_BYTES = 4000;
-
-  // Table metadata cache.
-  private final CatalogObjectCache<Table> tableCache_;
-
-  // All of the registered user functions. The key is the user facing name 
(e.g. "myUdf"),
-  // and the values are all the overloaded variants (e.g. myUdf(double), 
myUdf(string))
-  // This includes both UDFs and UDAs. Updates are made thread safe by 
synchronizing
-  // on this map. When a new Db object is initialized, this list is updated 
with the
-  // UDF/UDAs already persisted, if any, in the metastore DB. Functions are 
sorted in a
-  // canonical order defined by FunctionResolutionOrder.
-  private final HashMap<String, List<Function>> functions_;
-
-  // If true, this database is an Impala system database.
-  // (e.g. can't drop it, can't add tables to it, etc).
-  private boolean isSystemDb_ = false;
-
-  public Db(String name, Catalog catalog,
-      org.apache.hadoop.hive.metastore.api.Database msDb) {
-    thriftDb_ = new TDatabase(name.toLowerCase());
-    parentCatalog_ = catalog;
-    thriftDb_.setMetastore_db(msDb);
-    tableCache_ = new CatalogObjectCache<Table>();
-    functions_ = new HashMap<String, List<Function>>();
-  }
-
-  public void setIsSystemDb(boolean b) { isSystemDb_ = b; }
-
-  /**
-   * Creates a Db object with no tables based on the given TDatabase thrift 
struct.
-   */
-  public static Db fromTDatabase(TDatabase db, Catalog parentCatalog) {
-    return new Db(db.getDb_name(), parentCatalog, db.getMetastore_db());
-  }
-
-  /**
-   * Updates the hms parameters map by adding the input <k,v> pair.
-   */
-  private void putToHmsParameters(String k, String v) {
-    org.apache.hadoop.hive.metastore.api.Database msDb = 
thriftDb_.metastore_db;
-    Preconditions.checkNotNull(msDb);
-    Map<String, String> hmsParams = msDb.getParameters();
-    if (hmsParams == null) hmsParams = Maps.newHashMap();
-    hmsParams.put(k,v);
-    msDb.setParameters(hmsParams);
-  }
-
-  /**
-   * Updates the hms parameters map by removing the <k,v> pair corresponding to
-   * input key <k>. Returns true if the parameters map contains a pair <k,v>
-   * corresponding to input k and it is removed, false otherwise.
-   */
-  private boolean removeFromHmsParameters(String k) {
-    org.apache.hadoop.hive.metastore.api.Database msDb = 
thriftDb_.metastore_db;
-    Preconditions.checkNotNull(msDb);
-    if (msDb.getParameters() == null) return false;
-    return msDb.getParameters().remove(k) != null;
-  }
-
-  public boolean isSystemDb() { return isSystemDb_; }
-  public TDatabase toThrift() { return thriftDb_; }
-  @Override
-  public String getName() { return thriftDb_.getDb_name(); }
-  @Override
-  public TCatalogObjectType getCatalogObjectType() {
-    return TCatalogObjectType.DATABASE;
-  }
-
-  /**
-   * Adds a table to the table cache.
-   */
-  public void addTable(Table table) {
-    tableCache_.add(table);
-  }
-
-  /**
-   * Gets all table names in the table cache.
-   */
-  public List<String> getAllTableNames() {
-    return Lists.newArrayList(tableCache_.keySet());
-  }
-
-  public boolean containsTable(String tableName) {
-    return tableCache_.contains(tableName.toLowerCase());
-  }
-
-  /**
-   * Returns the Table with the given name if present in the table cache or 
null if the
-   * table does not exist in the cache.
-   */
-  public Table getTable(String tblName) {
-    return tableCache_.get(tblName);
-  }
-
-  /**
-   * Removes the table name and any cached metadata from the Table cache.
-   */
-  public Table removeTable(String tableName) {
-    return tableCache_.remove(tableName.toLowerCase());
-  }
-
-  /**
-   * Comparator that sorts function overloads. We want overloads to be always 
considered
-   * in a canonical order so that overload resolution in the case of multiple 
valid
-   * overloads does not depend on the order in which functions are added to 
the Db. The
-   * order is based on the PrimitiveType enum because this was the order used 
implicitly
-   * for builtin operators and functions in earlier versions of Impala.
-   */
-  private static class FunctionResolutionOrder implements Comparator<Function> 
{
-    @Override
-    public int compare(Function f1, Function f2) {
-      int numSharedArgs = Math.min(f1.getNumArgs(), f2.getNumArgs());
-      for (int i = 0; i < numSharedArgs; ++i) {
-        int cmp = typeCompare(f1.getArgs()[i], f2.getArgs()[i]);
-        if (cmp < 0) {
-          return -1;
-        } else if (cmp > 0) {
-          return 1;
-        }
-      }
-      // Put alternative with fewer args first.
-      if (f1.getNumArgs() < f2.getNumArgs()) {
-        return -1;
-      } else if (f1.getNumArgs() > f2.getNumArgs()) {
-        return 1;
-      }
-      return 0;
-    }
-
-    private int typeCompare(Type t1, Type t2) {
-      Preconditions.checkState(!t1.isComplexType());
-      Preconditions.checkState(!t2.isComplexType());
-      return Integer.compare(t1.getPrimitiveType().ordinal(),
-          t2.getPrimitiveType().ordinal());
-    }
-  }
-
-  private static final FunctionResolutionOrder FUNCTION_RESOLUTION_ORDER =
-      new FunctionResolutionOrder();
-
-  /**
-   * Returns the metastore.api.Database object this Database was created from.
-   * Returns null if it is not related to a hive database such as builtins_db.
-   */
-  public org.apache.hadoop.hive.metastore.api.Database getMetaStoreDb() {
-    return thriftDb_.getMetastore_db();
-  }
-
-  /**
-   * Returns the number of functions in this database.
-   */
-  public int numFunctions() {
-    synchronized (functions_) {
-      return functions_.size();
-    }
-  }
-
-  /**
-   * See comment in Catalog.
-   */
-  public boolean containsFunction(String name) {
-    synchronized (functions_) {
-      return functions_.get(name) != null;
-    }
-  }
-
-  /*
-   * See comment in Catalog.
-   */
-  public Function getFunction(Function desc, Function.CompareMode mode) {
-    synchronized (functions_) {
-      List<Function> fns = functions_.get(desc.functionName());
-      if (fns == null) return null;
-
-      // First check for identical
-      for (Function f: fns) {
-        if (f.compare(desc, Function.CompareMode.IS_IDENTICAL)) return f;
-      }
-      if (mode == Function.CompareMode.IS_IDENTICAL) return null;
-
-      // Next check for indistinguishable
-      for (Function f: fns) {
-        if (f.compare(desc, Function.CompareMode.IS_INDISTINGUISHABLE)) return 
f;
-      }
-      if (mode == Function.CompareMode.IS_INDISTINGUISHABLE) return null;
-
-      // Next check for strict supertypes
-      for (Function f: fns) {
-        if (f.compare(desc, Function.CompareMode.IS_SUPERTYPE_OF)) return f;
-      }
-      if (mode == Function.CompareMode.IS_SUPERTYPE_OF) return null;
-
-      // Finally check for non-strict supertypes
-      for (Function f: fns) {
-        if (f.compare(desc, Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF)) 
return f;
-      }
-    }
-    return null;
-  }
-
-  public Function getFunction(String signatureString) {
-    synchronized (functions_) {
-      for (List<Function> fns: functions_.values()) {
-        for (Function f: fns) {
-          if (f.signatureString().equals(signatureString)) return f;
-        }
-      }
-    }
-    return null;
-  }
-
-  /**
-   * Adds the user defined function fn to metastore DB params. fn is
-   * serialized to thrift using TBinaryProtocol and then base64-encoded
-   * to be compatible with the HMS' representation of params.
-   */
-  private boolean addFunctionToDbParams(Function fn) {
-    Preconditions.checkState(
-        fn.getBinaryType() != TFunctionBinaryType.BUILTIN &&
-        fn.getBinaryType() != TFunctionBinaryType.JAVA);
-    try {
-      TSerializer serializer =
-          new TSerializer(new TCompactProtocol.Factory());
-      byte[] serializedFn = serializer.serialize(fn.toThrift());
-      String base64Fn = Base64.encodeBase64String(serializedFn);
-      String fnKey = FUNCTION_INDEX_PREFIX + fn.signatureString();
-      if (base64Fn.length() > HIVE_METASTORE_DB_PARAM_LIMIT_BYTES) {
-        throw new ImpalaRuntimeException(
-            "Serialized function size exceeded HMS 4K byte limit");
-      }
-      putToHmsParameters(fnKey, base64Fn);
-    } catch (ImpalaException | TException  e) {
-      LOG.error("Error adding function " + fn.getName() + " to DB params", e);
-      return false;
-    }
-    return true;
-  }
-
-  public boolean addFunction(Function fn) {
-    // We use the db parameters map to persist native and IR functions.
-    boolean addToDbParams =
-        (fn.getBinaryType() == TFunctionBinaryType.NATIVE ||
-         fn.getBinaryType() == TFunctionBinaryType.IR);
-    return addFunction(fn, addToDbParams);
-  }
-
-  /**
-   * Registers the function fn to this database. If addToDbParams is true,
-   * fn is added to the metastore DB params. Returns false if the function
-   * fn already exists or when a failure is encountered while adding it to
-   * the metastore DB params and true otherwise.
-   */
-  public boolean addFunction(Function fn, boolean addToDbParams) {
-    Preconditions.checkState(fn.dbName().equals(getName()));
-    synchronized (functions_) {
-      if (getFunction(fn, Function.CompareMode.IS_INDISTINGUISHABLE) != null) {
-        return false;
-      }
-      List<Function> fns = functions_.get(fn.functionName());
-      if (fns == null) {
-        fns = Lists.newArrayList();
-        functions_.put(fn.functionName(), fns);
-      }
-      if (addToDbParams && !addFunctionToDbParams(fn)) return false;
-      fns.add(fn);
-      Collections.sort(fns, FUNCTION_RESOLUTION_ORDER);
-      return true;
-    }
-  }
-
-  /**
-   * See comment in Catalog.
-   */
-  public Function removeFunction(Function desc) {
-    synchronized (functions_) {
-      Function fn = getFunction(desc, 
Function.CompareMode.IS_INDISTINGUISHABLE);
-      if (fn == null) return null;
-      List<Function> fns = functions_.get(desc.functionName());
-      Preconditions.checkNotNull(fns);
-      fns.remove(fn);
-      if (fns.isEmpty()) functions_.remove(desc.functionName());
-      if (fn.getBinaryType() == TFunctionBinaryType.JAVA) return fn;
-      // Remove the function from the metastore database parameters
-      String fnKey = FUNCTION_INDEX_PREFIX + fn.signatureString();
-      boolean removeFn = removeFromHmsParameters(fnKey);
-      Preconditions.checkState(removeFn);
-      return fn;
-    }
-  }
-
-  /**
-   * Removes a Function with the matching signature string. Returns the 
removed Function
-   * if a Function was removed as a result of this call, null otherwise.
-   * TODO: Move away from using signature strings and instead use Function IDs.
-   */
-  public Function removeFunction(String signatureStr) {
-    synchronized (functions_) {
-      Function targetFn = getFunction(signatureStr);
-      if (targetFn != null) return removeFunction(targetFn);
-    }
-    return null;
-  }
-
-  /**
-   * Add a builtin with the specified name and signatures to this db.
-   * This defaults to not using a Prepare/Close function.
-   */
-  public void addScalarBuiltin(String fnName, String symbol, boolean 
userVisible,
-      boolean varArgs, Type retType, Type ... args) {
-    addScalarBuiltin(fnName, symbol, userVisible, null, null, varArgs, 
retType, args);
-  }
-
-  /**
-   * Add a builtin with the specified name and signatures to this db.
-   */
-  public void addScalarBuiltin(String fnName, String symbol, boolean 
userVisible,
-      String prepareFnSymbol, String closeFnSymbol, boolean varArgs, Type 
retType,
-      Type ... args) {
-    Preconditions.checkState(isSystemDb());
-    addBuiltin(ScalarFunction.createBuiltin(
-        fnName, Lists.newArrayList(args), varArgs, retType,
-        symbol, prepareFnSymbol, closeFnSymbol, userVisible));
-  }
-
-  /**
-   * Adds a builtin to this database. The function must not already exist.
-   */
-  public void addBuiltin(Function fn) {
-    Preconditions.checkState(isSystemDb());
-    Preconditions.checkState(fn != null);
-    Preconditions.checkState(getFunction(fn, 
Function.CompareMode.IS_IDENTICAL) == null);
-    addFunction(fn, false);
-  }
-
-  /**
-   * Returns a map of functionNames to list of (overloaded) functions with 
that name.
-   * This is not thread safe so a higher level lock must be taken while 
iterating
-   * over the returned functions.
-   */
-  protected HashMap<String, List<Function>> getAllFunctions() {
-    return functions_;
-  }
-
-  /**
-   * Returns a list of transient functions in this Db.
-   */
-  protected List<Function> getTransientFunctions() {
-    List<Function> result = Lists.newArrayList();
-    synchronized (functions_) {
-      for (String fnKey: functions_.keySet()) {
-        for (Function fn: functions_.get(fnKey)) {
-          if (fn.userVisible() && !fn.isPersistent()) {
-            result.add(fn);
-          }
-        }
-      }
-    }
-    return result;
-  }
-
-  /**
-   * Returns all functions that match the pattern of 'matcher'.
-   */
-  public List<Function> getFunctions(TFunctionCategory category,
-      PatternMatcher matcher) {
-    Preconditions.checkNotNull(matcher);
-    List<Function> result = Lists.newArrayList();
-    synchronized (functions_) {
-      for (Map.Entry<String, List<Function>> fns: functions_.entrySet()) {
-        if (!matcher.matches(fns.getKey())) continue;
-        for (Function fn: fns.getValue()) {
-          if ((category == null || Function.categoryMatch(fn, category))
-              && fn.userVisible()) {
-            result.add(fn);
-          }
-        }
-      }
-    }
-    return result;
-  }
-
-  /**
-   * Returns all functions with the given name
-   */
-  public List<Function> getFunctions(String name) {
-    List<Function> result = Lists.newArrayList();
-    Preconditions.checkNotNull(name);
-    synchronized (functions_) {
-      if (!functions_.containsKey(name)) return result;
-      for (Function fn: functions_.get(name)) {
-        if (fn.userVisible()) result.add(fn);
-      }
-    }
-    return result;
-  }
-
-  /**
-   * Returns all functions with the given name and category.
-   */
-  public List<Function> getFunctions(TFunctionCategory category, String name) {
-    List<Function> result = Lists.newArrayList();
-    Preconditions.checkNotNull(category);
-    Preconditions.checkNotNull(name);
-    synchronized (functions_) {
-      if (!functions_.containsKey(name)) return result;
-      for (Function fn: functions_.get(name)) {
-        if (fn.userVisible() && Function.categoryMatch(fn, category)) {
-          result.add(fn);
-        }
-      }
-    }
-    return result;
-  }
-
-  @Override
-  public long getCatalogVersion() { return catalogVersion_; }
-  @Override
-  public void setCatalogVersion(long newVersion) { catalogVersion_ = 
newVersion; }
-  public Catalog getParentCatalog() { return parentCatalog_; }
-
-  @Override
-  public boolean isLoaded() { return true; }
-}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/catalog/Function.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/catalog/Function.java 
b/fe/src/main/java/com/cloudera/impala/catalog/Function.java
deleted file mode 100644
index 406e958..0000000
--- a/fe/src/main/java/com/cloudera/impala/catalog/Function.java
+++ /dev/null
@@ -1,488 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package com.cloudera.impala.catalog;
-
-import java.util.List;
-
-import com.cloudera.impala.analysis.FunctionName;
-import com.cloudera.impala.analysis.HdfsUri;
-import com.cloudera.impala.common.AnalysisException;
-import com.cloudera.impala.common.InternalException;
-import com.cloudera.impala.service.FeSupport;
-import com.cloudera.impala.thrift.TAggregateFunction;
-import com.cloudera.impala.thrift.TCatalogObjectType;
-import com.cloudera.impala.thrift.TColumnType;
-import com.cloudera.impala.thrift.TFunction;
-import com.cloudera.impala.thrift.TFunctionBinaryType;
-import com.cloudera.impala.thrift.TFunctionCategory;
-import com.cloudera.impala.thrift.TScalarFunction;
-import com.cloudera.impala.thrift.TSymbolLookupParams;
-import com.cloudera.impala.thrift.TSymbolLookupResult;
-import com.cloudera.impala.thrift.TSymbolType;
-import com.google.common.base.Joiner;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-
-
-/**
- * Base class for all functions.
- * Each function can be of the following 4 types.
- * - Native/IR stored in db params (persisted, visible to Impala)
- * - Hive UDFs stored in the HMS (visible to Hive + Impala)
- * - Java UDFs which are not persisted (visible to Impala but not Hive)
- * - Builtin functions, which are recreated after every restart of the
- *   catalog. (persisted, visible to Impala)
- */
-public class Function implements CatalogObject {
-  // Enum for how to compare function signatures.
-  // For decimal types, the type in the function can be a wildcard, i.e. 
decimal(*,*).
-  // The wildcard can *only* exist as function type, the caller will always be 
a
-  // fully specified decimal.
-  // For the purposes of function type resolution, decimal(*,*) will match 
exactly
-  // with any fully specified decimal (i.e. fn(decimal(*,*)) matches 
identically for
-  // the call to fn(decimal(1,0)).
-  public enum CompareMode {
-    // Two signatures are identical if the number of arguments and their types 
match
-    // exactly and either both signatures are varargs or neither.
-    IS_IDENTICAL,
-
-    // Two signatures are indistinguishable if there is no way to tell them 
apart
-    // when matching a particular instantiation. That is, their fixed arguments
-    // match exactly and the remaining varargs have the same type.
-    // e.g. fn(int, int, int) and fn(int...)
-    // Argument types that are NULL are ignored when doing this comparison.
-    // e.g. fn(NULL, int) is indistinguishable from fn(int, int)
-    IS_INDISTINGUISHABLE,
-
-    // X is a supertype of Y if Y.arg[i] can be strictly implicitly cast to 
X.arg[i]. If
-    /// X has vargs, the remaining arguments of Y must be strictly implicitly 
castable
-    // to the var arg type. The key property this provides is that X can be 
used in place
-    // of Y. e.g. fn(int, double, string...) is a supertype of fn(tinyint, 
float, string,
-    // string)
-    IS_SUPERTYPE_OF,
-
-    // Nonstrict supertypes broaden the definition of supertype to accept 
implicit casts
-    // of arguments that may result in loss of precision - e.g. decimal to 
float.
-    IS_NONSTRICT_SUPERTYPE_OF,
-  }
-
-  // User specified function name e.g. "Add"
-  private FunctionName name_;
-
-  private final Type retType_;
-  // Array of parameter types.  empty array if this function does not have 
parameters.
-  private Type[] argTypes_;
-
-  // If true, this function has variable arguments.
-  // TODO: we don't currently support varargs with no fixed types. i.e. fn(...)
-  private boolean hasVarArgs_;
-
-  // If true (default), this function is called directly by the user. For 
operators,
-  // this is false. If false, it also means the function is not visible from
-  // 'show functions'.
-  private boolean userVisible_;
-
-  // Absolute path in HDFS for the binary that contains this function.
-  // e.g. /udfs/udfs.jar
-  private HdfsUri location_;
-  private TFunctionBinaryType binaryType_;
-
-  // Set to true for functions that survive service restarts, including all 
builtins,
-  // native and IR functions, but only Java functions created without a 
signature.
-  private boolean isPersistent_;
-  private long catalogVersion_ =  Catalog.INITIAL_CATALOG_VERSION;
-
-  public Function(FunctionName name, Type[] argTypes,
-      Type retType, boolean varArgs) {
-    this.name_ = name;
-    this.hasVarArgs_ = varArgs;
-    if (argTypes == null) {
-      argTypes_ = new Type[0];
-    } else {
-      this.argTypes_ = argTypes;
-    }
-    if (retType == null) {
-      this.retType_ = ScalarType.INVALID;
-    } else {
-      this.retType_ = retType;
-    }
-    this.userVisible_ = true;
-  }
-
-  public Function(FunctionName name, List<Type> args,
-      Type retType, boolean varArgs) {
-    this(name, (Type[])null, retType, varArgs);
-    if (args != null && args.size() > 0) {
-      argTypes_ = args.toArray(new Type[args.size()]);
-    } else {
-      argTypes_ = new Type[0];
-    }
-  }
-
-  /**
-   * Static helper method to create a function with a given 
TFunctionBinaryType.
-   */
-  public static Function createFunction(String db, String fnName, List<Type> 
args,
-      Type retType, boolean varArgs, TFunctionBinaryType fnType) {
-    Function fn =
-        new Function(new FunctionName(db, fnName), args, retType, varArgs);
-    fn.setBinaryType(fnType);
-    return fn;
-  }
-
-  public FunctionName getFunctionName() { return name_; }
-  public String functionName() { return name_.getFunction(); }
-  public String dbName() { return name_.getDb(); }
-  public Type getReturnType() { return retType_; }
-  public Type[] getArgs() { return argTypes_; }
-  // Returns the number of arguments to this function.
-  public int getNumArgs() { return argTypes_.length; }
-  public HdfsUri getLocation() { return location_; }
-  public TFunctionBinaryType getBinaryType() { return binaryType_; }
-  public boolean hasVarArgs() { return hasVarArgs_; }
-  public boolean isPersistent() { return isPersistent_; }
-  public boolean userVisible() { return userVisible_; }
-  public Type getVarArgsType() {
-    if (!hasVarArgs_) return Type.INVALID;
-    Preconditions.checkState(argTypes_.length > 0);
-    return argTypes_[argTypes_.length - 1];
-  }
-
-  public void setName(FunctionName name) { name_ = name; }
-  public void setLocation(HdfsUri loc) { location_ = loc; }
-  public void setBinaryType(TFunctionBinaryType type) { binaryType_ = type; }
-  public void setHasVarArgs(boolean v) { hasVarArgs_ = v; }
-  public void setIsPersistent(boolean v) { isPersistent_ = v; }
-  public void setUserVisible(boolean b) { userVisible_ = b; }
-
-  // Returns a string with the signature in human readable format:
-  // FnName(argtype1, argtyp2).  e.g. Add(int, int)
-  public String signatureString() {
-    StringBuilder sb = new StringBuilder();
-    sb.append(name_.getFunction())
-      .append("(")
-      .append(Joiner.on(", ").join(argTypes_));
-    if (hasVarArgs_) sb.append("...");
-    sb.append(")");
-    return sb.toString();
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    if (!(o instanceof Function)) return false;
-    return compare((Function)o, CompareMode.IS_IDENTICAL);
-  }
-
-  // Compares this to 'other' for mode.
-  public boolean compare(Function other, CompareMode mode) {
-    switch (mode) {
-      case IS_IDENTICAL: return isIdentical(other);
-      case IS_INDISTINGUISHABLE: return isIndistinguishable(other);
-      case IS_SUPERTYPE_OF: return isSuperTypeOf(other, true);
-      case IS_NONSTRICT_SUPERTYPE_OF: return isSuperTypeOf(other, false);
-      default:
-        Preconditions.checkState(false);
-        return false;
-    }
-  }
-  /**
-   * Returns true if 'this' is a supertype of 'other'. Each argument in other 
must
-   * be implicitly castable to the matching argument in this. If strict is 
true,
-   * only consider conversions where there is no loss of precision.
-   */
-  private boolean isSuperTypeOf(Function other, boolean strict) {
-    if (!other.name_.equals(name_)) return false;
-    if (!this.hasVarArgs_ && other.argTypes_.length != this.argTypes_.length) {
-      return false;
-    }
-    if (this.hasVarArgs_ && other.argTypes_.length < this.argTypes_.length) 
return false;
-    for (int i = 0; i < this.argTypes_.length; ++i) {
-      if (!Type.isImplicitlyCastable(other.argTypes_[i], this.argTypes_[i], 
strict)) {
-        return false;
-      }
-    }
-    // Check trailing varargs.
-    if (this.hasVarArgs_) {
-      for (int i = this.argTypes_.length; i < other.argTypes_.length; ++i) {
-        if (other.argTypes_[i].matchesType(this.getVarArgsType())) continue;
-        if (!Type.isImplicitlyCastable(other.argTypes_[i], 
this.getVarArgsType(),
-              strict)) {
-          return false;
-        }
-      }
-    }
-    return true;
-  }
-
-  /**
-   * Converts any CHAR arguments to be STRING arguments
-   */
-  public Function promoteCharsToStrings() {
-    Type[] promoted = argTypes_.clone();
-    for (int i = 0; i < promoted.length; ++i) {
-      if (promoted[i].isScalarType(PrimitiveType.CHAR)) promoted[i] = 
ScalarType.STRING;
-    }
-    return new Function(name_, promoted, retType_, hasVarArgs_);
-  }
-
-  /**
-   * Given a list of functions which are a super type of this function, select 
the best
-   * match. This is the one which requires the fewest type promotions.
-   */
-  public Function selectClosestSuperType(List<Function> candidates) {
-    Preconditions.checkArgument(candidates.size() > 0);
-    if (candidates.size() == 1) return candidates.get(0);
-
-    // Always promote CHAR to STRING before attempting any other promotions.
-    Function withStrs = promoteCharsToStrings();
-    for (Function f: candidates) {
-      if (withStrs.isIndistinguishable(f)) return f;
-    }
-    // Otherwise, we use the previous rules of resolution which are to take 
the first
-    // one in the list.
-    return candidates.get(0);
-  }
-
-  private boolean isIdentical(Function o) {
-    if (!o.name_.equals(name_)) return false;
-    if (o.argTypes_.length != this.argTypes_.length) return false;
-    if (o.hasVarArgs_ != this.hasVarArgs_) return false;
-    for (int i = 0; i < this.argTypes_.length; ++i) {
-      if (!o.argTypes_[i].matchesType(this.argTypes_[i])) return false;
-    }
-    return true;
-  }
-
-  private boolean isIndistinguishable(Function o) {
-    if (!o.name_.equals(name_)) return false;
-    int minArgs = Math.min(o.argTypes_.length, this.argTypes_.length);
-    // The first fully specified args must be identical.
-    for (int i = 0; i < minArgs; ++i) {
-      if (o.argTypes_[i].isNull() || this.argTypes_[i].isNull()) continue;
-      if (!o.argTypes_[i].matchesType(this.argTypes_[i])) return false;
-    }
-    if (o.argTypes_.length == this.argTypes_.length) return true;
-
-    if (o.hasVarArgs_ && this.hasVarArgs_) {
-      if (!o.getVarArgsType().matchesType(this.getVarArgsType())) return false;
-      if (this.getNumArgs() > o.getNumArgs()) {
-        for (int i = minArgs; i < this.getNumArgs(); ++i) {
-          if (this.argTypes_[i].isNull()) continue;
-          if (!this.argTypes_[i].matchesType(o.getVarArgsType())) return false;
-        }
-      } else {
-        for (int i = minArgs; i < o.getNumArgs(); ++i) {
-          if (o.argTypes_[i].isNull()) continue;
-          if (!o.argTypes_[i].matchesType(this.getVarArgsType())) return false;
-        }
-      }
-      return true;
-    } else if (o.hasVarArgs_) {
-      // o has var args so check the remaining arguments from this
-      if (o.getNumArgs() > minArgs) return false;
-      for (int i = minArgs; i < this.getNumArgs(); ++i) {
-        if (this.argTypes_[i].isNull()) continue;
-        if (!this.argTypes_[i].matchesType(o.getVarArgsType())) return false;
-      }
-      return true;
-    } else if (this.hasVarArgs_) {
-      // this has var args so check the remaining arguments from s
-      if (this.getNumArgs() > minArgs) return false;
-      for (int i = minArgs; i < o.getNumArgs(); ++i) {
-        if (o.argTypes_[i].isNull()) continue;
-        if (!o.argTypes_[i].matchesType(this.getVarArgsType())) return false;
-      }
-      return true;
-    } else {
-      // Neither has var args and the lengths don't match
-      return false;
-    }
-  }
-
-  @Override
-  public TCatalogObjectType getCatalogObjectType() { return 
TCatalogObjectType.FUNCTION; }
-
-  @Override
-  public long getCatalogVersion() { return catalogVersion_; }
-
-  @Override
-  public void setCatalogVersion(long newVersion) { catalogVersion_ = 
newVersion; }
-
-  @Override
-  public String getName() { return getFunctionName().toString(); }
-
-  // Child classes must override this function.
-  public String toSql(boolean ifNotExists) { return ""; }
-
-  public TFunction toThrift() {
-    TFunction fn = new TFunction();
-    fn.setSignature(signatureString());
-    fn.setName(name_.toThrift());
-    fn.setBinary_type(binaryType_);
-    if (location_ != null) fn.setHdfs_location(location_.toString());
-    fn.setArg_types(Type.toThrift(argTypes_));
-    fn.setRet_type(getReturnType().toThrift());
-    fn.setHas_var_args(hasVarArgs_);
-    fn.setIs_persistent(isPersistent_);
-    // TODO: Comment field is missing?
-    // fn.setComment(comment_)
-    return fn;
-  }
-
-  public static Function fromThrift(TFunction fn) {
-    List<Type> argTypes = Lists.newArrayList();
-    for (TColumnType t: fn.getArg_types()) {
-      argTypes.add(Type.fromThrift(t));
-    }
-
-    Function function = null;
-    if (fn.isSetScalar_fn()) {
-      TScalarFunction scalarFn = fn.getScalar_fn();
-      function = new ScalarFunction(FunctionName.fromThrift(fn.getName()), 
argTypes,
-          Type.fromThrift(fn.getRet_type()), new 
HdfsUri(fn.getHdfs_location()),
-          scalarFn.getSymbol(), scalarFn.getPrepare_fn_symbol(),
-          scalarFn.getClose_fn_symbol());
-    } else if (fn.isSetAggregate_fn()) {
-      TAggregateFunction aggFn = fn.getAggregate_fn();
-      function = new AggregateFunction(FunctionName.fromThrift(fn.getName()), 
argTypes,
-          Type.fromThrift(fn.getRet_type()),
-          Type.fromThrift(aggFn.getIntermediate_type()),
-          new HdfsUri(fn.getHdfs_location()), aggFn.getUpdate_fn_symbol(),
-          aggFn.getInit_fn_symbol(), aggFn.getSerialize_fn_symbol(),
-          aggFn.getMerge_fn_symbol(), aggFn.getGet_value_fn_symbol(),
-          null, aggFn.getFinalize_fn_symbol());
-    } else {
-      // In the case where we are trying to look up the object, we only have 
the
-      // signature.
-      function = new Function(FunctionName.fromThrift(fn.getName()),
-          argTypes, Type.fromThrift(fn.getRet_type()), fn.isHas_var_args());
-    }
-    function.setBinaryType(fn.getBinary_type());
-    function.setHasVarArgs(fn.isHas_var_args());
-    if (fn.isSetIs_persistent()) {
-      function.setIsPersistent(fn.isIs_persistent());
-    } else {
-      function.setIsPersistent(false);
-    }
-    return function;
-  }
-
-  @Override
-  public boolean isLoaded() { return true; }
-
-  // Returns the resolved symbol in the binary. The BE will do a lookup of 
'symbol'
-  // in the binary and try to resolve unmangled names.
-  // If this function is expecting a return argument, retArgType is that type. 
It should
-  // be null if this function isn't expecting a return argument.
-  public String lookupSymbol(String symbol, TSymbolType symbolType, Type 
retArgType,
-      boolean hasVarArgs, Type... argTypes) throws AnalysisException {
-    if (symbol.length() == 0) {
-      if (binaryType_ == TFunctionBinaryType.BUILTIN) {
-        // We allow empty builtin symbols in order to stage work in the FE 
before its
-        // implemented in the BE
-        return symbol;
-      }
-      throw new AnalysisException("Could not find symbol ''");
-    }
-
-    TSymbolLookupParams lookup = new TSymbolLookupParams();
-    // Builtin functions do not have an external library, they are loaded 
directly from
-    // the running process
-    lookup.location =  binaryType_ != TFunctionBinaryType.BUILTIN ?
-        location_.toString() : "";
-    lookup.symbol = symbol;
-    lookup.symbol_type = symbolType;
-    lookup.fn_binary_type = binaryType_;
-    lookup.arg_types = Type.toThrift(argTypes);
-    lookup.has_var_args = hasVarArgs;
-    if (retArgType != null) lookup.setRet_arg_type(retArgType.toThrift());
-
-    try {
-      TSymbolLookupResult result = FeSupport.LookupSymbol(lookup);
-      switch (result.result_code) {
-        case SYMBOL_FOUND:
-          return result.symbol;
-        case BINARY_NOT_FOUND:
-          Preconditions.checkState(binaryType_ != TFunctionBinaryType.BUILTIN);
-          throw new AnalysisException(
-              "Could not load binary: " + location_.getLocation() + "\n" +
-              result.error_msg);
-        case SYMBOL_NOT_FOUND:
-          throw new AnalysisException(result.error_msg);
-        default:
-          // Should never get here.
-          throw new AnalysisException("Internal Error");
-      }
-    } catch (InternalException e) {
-      // Should never get here.
-      e.printStackTrace();
-      throw new AnalysisException("Could not find symbol: " + symbol, e);
-    }
-  }
-
-  public String lookupSymbol(String symbol, TSymbolType symbolType)
-      throws AnalysisException {
-    Preconditions.checkState(
-        symbolType == TSymbolType.UDF_PREPARE || symbolType == 
TSymbolType.UDF_CLOSE);
-    return lookupSymbol(symbol, symbolType, null, false);
-  }
-
-  public static String getUdfType(Type t) {
-    switch (t.getPrimitiveType()) {
-    case BOOLEAN:
-      return "BooleanVal";
-    case TINYINT:
-      return "TinyIntVal";
-    case SMALLINT:
-      return "SmallIntVal";
-    case INT:
-      return "IntVal";
-    case BIGINT:
-      return "BigIntVal";
-    case FLOAT:
-      return "FloatVal";
-    case DOUBLE:
-      return "DoubleVal";
-    case STRING:
-    case VARCHAR:
-    case CHAR:
-      return "StringVal";
-    case TIMESTAMP:
-      return "TimestampVal";
-    case DECIMAL:
-      return "DecimalVal";
-    default:
-      Preconditions.checkState(false, t.toString());
-      return "";
-    }
-  }
-
-  /**
-   * Returns true if the given function matches the specified category.
-   */
-  public static boolean categoryMatch(Function fn, TFunctionCategory category) 
{
-    Preconditions.checkNotNull(category);
-    return (category == TFunctionCategory.SCALAR && fn instanceof 
ScalarFunction)
-        || (category == TFunctionCategory.AGGREGATE
-            && fn instanceof AggregateFunction
-            && ((AggregateFunction)fn).isAggregateFn())
-        || (category == TFunctionCategory.ANALYTIC
-            && fn instanceof AggregateFunction
-            && ((AggregateFunction)fn).isAnalyticFn());
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/catalog/HBaseColumn.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/catalog/HBaseColumn.java 
b/fe/src/main/java/com/cloudera/impala/catalog/HBaseColumn.java
deleted file mode 100644
index 37fa853..0000000
--- a/fe/src/main/java/com/cloudera/impala/catalog/HBaseColumn.java
+++ /dev/null
@@ -1,67 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package com.cloudera.impala.catalog;
-
-import com.cloudera.impala.thrift.TColumn;
-
-// Describes an HBase column mapped to a Hive column (as described in the 
metastore).
-// this.name describes the column name in Hive.
-// This class adds the HBase columnFamily and columnQualifier,
-// so we can read the column from HBase directly.
-public class HBaseColumn extends Column implements Comparable<HBaseColumn> {
-  private final String columnFamily_;
-  private final String columnQualifier_;
-  private final boolean binaryEncoded_;
-
-  public HBaseColumn(String name, String columnFamily, String columnQualifier,
-      boolean binaryEncoded, Type type, String comment, int position) {
-    super(name, type, comment, position);
-    columnFamily_ = columnFamily;
-    columnQualifier_ = columnQualifier;
-    binaryEncoded_ = binaryEncoded;
-  }
-
-  public String getColumnFamily() { return columnFamily_; }
-  public String getColumnQualifier() { return columnQualifier_; }
-  public boolean isBinaryEncoded() { return binaryEncoded_; }
-
-  @Override
-  // We order the HBase columns in the matadata based on 
columnFamily,columnQualifier,
-  // to more easily map slots from HBase's Result.raw() to target slots in the 
backend.
-  public int compareTo(HBaseColumn o) {
-    int familyCmp = columnFamily_.compareTo(o.columnFamily_);
-    if (familyCmp != 0) {
-      return familyCmp;
-    }
-    int qualifierCmp = columnQualifier_.compareTo(o.columnQualifier_);
-    return qualifierCmp;
-  }
-
-  @Override
-  public TColumn toThrift() {
-    TColumn colDesc = new TColumn(name_, type_.toThrift());
-    if (comment_ != null) colDesc.setComment(comment_);
-    colDesc.setCol_stats(getStats().toThrift());
-    colDesc.setPosition(position_);
-    colDesc.setIs_hbase_column(true);
-    colDesc.setColumn_family(columnFamily_);
-    colDesc.setColumn_qualifier(columnQualifier_);
-    colDesc.setIs_binary(binaryEncoded_);
-    return colDesc;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/catalog/HBaseTable.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/catalog/HBaseTable.java 
b/fe/src/main/java/com/cloudera/impala/catalog/HBaseTable.java
deleted file mode 100644
index d96314e..0000000
--- a/fe/src/main/java/com/cloudera/impala/catalog/HBaseTable.java
+++ /dev/null
@@ -1,853 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package com.cloudera.impala.catalog;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.ClusterStatus;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HRegionLocation;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.ServerLoad;
-import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.RegionLoad;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.io.compress.Compression;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hive.hbase.HBaseSerDe;
-import org.apache.hadoop.hive.metastore.IMetaStoreClient;
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
-import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.log4j.Logger;
-
-import com.cloudera.impala.common.Pair;
-import com.cloudera.impala.thrift.TCatalogObjectType;
-import com.cloudera.impala.thrift.TColumn;
-import com.cloudera.impala.thrift.THBaseTable;
-import com.cloudera.impala.thrift.TResultSet;
-import com.cloudera.impala.thrift.TResultSetMetadata;
-import com.cloudera.impala.thrift.TTable;
-import com.cloudera.impala.thrift.TTableDescriptor;
-import com.cloudera.impala.thrift.TTableType;
-import com.cloudera.impala.util.StatsHelper;
-import com.cloudera.impala.util.TResultRowBuilder;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-
-/**
- * Impala representation of HBase table metadata,
- * as loaded from Hive's metastore.
- * This implies that we inherit the metastore's limitations related to HBase,
- * for example the lack of support for composite HBase row keys.
- * We sort the HBase columns (cols) by family/qualifier
- * to simplify the retrieval logic in the backend, since
- * HBase returns data ordered by family/qualifier.
- * This implies that a "select *"-query on an HBase table
- * will not have the columns ordered as they were declared in the DDL.
- * They will be ordered by family/qualifier.
- *
- */
-public class HBaseTable extends Table {
-  // Maximum deviation from the average to stop querying more regions
-  // to estimate the row count
-  private static final double DELTA_FROM_AVERAGE = 0.15;
-
-  private static final Logger LOG = Logger.getLogger(HBaseTable.class);
-
-  // Copied from Hive's HBaseStorageHandler.java.
-  public static final String DEFAULT_PREFIX = "default.";
-
-  // Number of rows fetched during the row count estimation per region
-  public static final int ROW_COUNT_ESTIMATE_BATCH_SIZE = 10;
-
-  // Minimum number of regions that are checked to estimate the row count
-  private static final int MIN_NUM_REGIONS_TO_CHECK = 5;
-
-  // Column referring to HBase row key.
-  // Hive (including metastore) currently doesn't support composite HBase keys.
-  protected HBaseColumn rowKey_;
-
-  // Name of table in HBase.
-  // 'this.name' is the alias of the HBase table in Hive.
-  protected String hbaseTableName_;
-
-  // Input format class for HBase tables read by Hive.
-  private static final String HBASE_INPUT_FORMAT =
-      "org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat";
-
-  // Serialization class for HBase tables set in the corresponding Metastore 
table.
-  private static final String HBASE_SERIALIZATION_LIB =
-      "org.apache.hadoop.hive.hbase.HBaseSerDe";
-
-  // Storage handler class for HBase tables read by Hive.
-  private static final String HBASE_STORAGE_HANDLER =
-      "org.apache.hadoop.hive.hbase.HBaseStorageHandler";
-
-  // Column family of HBase row key
-  private static final String ROW_KEY_COLUMN_FAMILY = ":key";
-
-  // Keep the conf around
-  private final static Configuration hbaseConf_ = HBaseConfiguration.create();
-
-  // Cached column families. Used primarily for speeding up row stats 
estimation
-  // (see CDH-19292).
-  private HColumnDescriptor[] columnFamilies_ = null;
-
-  protected HBaseTable(TableId id, org.apache.hadoop.hive.metastore.api.Table 
msTbl,
-      Db db, String name, String owner) {
-    super(id, msTbl, db, name, owner);
-  }
-
-  /**
-   * Connection instances are expensive to create. The HBase documentation 
recommends
-   * one and then sharing it among threads. All operations on a connection are
-   * thread-safe.
-   */
-  private static class ConnectionHolder {
-    private static Connection connection_ = null;
-
-    public static synchronized Connection getConnection(Configuration conf)
-        throws IOException {
-      if (connection_ == null || connection_.isClosed()) {
-        connection_ = ConnectionFactory.createConnection(conf);
-      }
-      return connection_;
-    }
-  }
-
-  /**
-   * Table client objects are thread-unsafe and cheap to create. The HBase 
docs recommend
-   * creating a new one for each task and then closing when done.
-   */
-  public org.apache.hadoop.hbase.client.Table getHBaseTable() throws 
IOException {
-    return ConnectionHolder.getConnection(hbaseConf_)
-        .getTable(TableName.valueOf(hbaseTableName_));
-  }
-
-  private void closeHBaseTable(org.apache.hadoop.hbase.client.Table table) {
-    try {
-      table.close();
-    } catch (IOException e) {
-      LOG.error("Error closing HBase table: " + hbaseTableName_, e);
-    }
-  }
-
-  /**
-   * Get the cluster status, making sure we close the admin client afterwards.
-   */
-  public ClusterStatus getClusterStatus() throws IOException {
-    Admin admin = null;
-    ClusterStatus clusterStatus = null;
-    try {
-      Connection connection = ConnectionHolder.getConnection(hbaseConf_);
-      admin = connection.getAdmin();
-      clusterStatus = admin.getClusterStatus();
-    } finally {
-      if (admin != null) admin.close();
-    }
-    return clusterStatus;
-  }
-
-  /**
-   * Parse the column description string to the column families and column
-   * qualifies. This is a copy of HBaseSerDe.parseColumnMapping and
-   * parseColumnStorageTypes with parts we don't use removed. The hive 
functions
-   * are not public.
-
-   * tableDefaultStorageIsBinary - true if table is default to binary encoding
-   * columnsMappingSpec - input string format describing the table
-   * fieldSchemas - input field schema from metastore table
-   * columnFamilies/columnQualifiers/columnBinaryEncodings - out parameters 
that will be
-   * filled with the column family, column qualifier and encoding for each 
column.
-   */
-  private void parseColumnMapping(boolean tableDefaultStorageIsBinary,
-      String columnsMappingSpec, List<FieldSchema> fieldSchemas,
-      List<String> columnFamilies, List<String> columnQualifiers,
-      List<Boolean> colIsBinaryEncoded) throws SerDeException {
-    if (columnsMappingSpec == null) {
-      throw new SerDeException(
-          "Error: hbase.columns.mapping missing for this HBase table.");
-    }
-
-    if (columnsMappingSpec.equals("") ||
-        columnsMappingSpec.equals(HBaseSerDe.HBASE_KEY_COL)) {
-      throw new SerDeException("Error: hbase.columns.mapping specifies only "
-          + "the HBase table row key. A valid Hive-HBase table must specify at 
"
-          + "least one additional column.");
-    }
-
-    int rowKeyIndex = -1;
-    String[] columnSpecs = columnsMappingSpec.split(",");
-    // If there was an implicit key column mapping, the number of columns 
(fieldSchemas)
-    // will be one more than the number of column mapping specs.
-    int fsStartIdxOffset = fieldSchemas.size() - columnSpecs.length;
-    if (fsStartIdxOffset != 0 && fsStartIdxOffset != 1) {
-      // This should never happen - Hive blocks creating a mismatched table 
and both Hive
-      // and Impala currently block all column-level DDL on HBase tables.
-      throw new SerDeException(String.format("Number of entries in " +
-          "'hbase.columns.mapping' does not match the number of columns in the 
" +
-          "table: %d != %d (counting the key if implicit)",
-          columnSpecs.length, fieldSchemas.size()));
-    }
-
-    for (int i = 0; i < columnSpecs.length; ++i) {
-      String mappingSpec = columnSpecs[i];
-      String[] mapInfo = mappingSpec.split("#");
-      // Trim column info so that serdeproperties with new lines still parse 
correctly.
-      String colInfo = mapInfo[0].trim();
-
-      int idxFirst = colInfo.indexOf(":");
-      int idxLast = colInfo.lastIndexOf(":");
-
-      if (idxFirst < 0 || !(idxFirst == idxLast)) {
-        throw new SerDeException("Error: the HBase columns mapping contains a "
-            + "badly formed column family, column qualifier specification.");
-      }
-
-      if (colInfo.equals(HBaseSerDe.HBASE_KEY_COL)) {
-        Preconditions.checkState(fsStartIdxOffset == 0);
-        rowKeyIndex = i;
-        columnFamilies.add(colInfo);
-        columnQualifiers.add(null);
-      } else {
-        String[] parts = colInfo.split(":");
-        Preconditions.checkState(parts.length > 0 && parts.length <= 2);
-        columnFamilies.add(parts[0]);
-        if (parts.length == 2) {
-          columnQualifiers.add(parts[1]);
-        } else {
-          columnQualifiers.add(null);
-        }
-      }
-
-      // Set column binary encoding
-      FieldSchema fieldSchema = fieldSchemas.get(i + fsStartIdxOffset);
-      boolean supportsBinaryEncoding = supportsBinaryEncoding(fieldSchema);
-      if (mapInfo.length == 1) {
-        // There is no column level storage specification. Use the table 
storage spec.
-        colIsBinaryEncoded.add(
-            new Boolean(tableDefaultStorageIsBinary && 
supportsBinaryEncoding));
-      } else if (mapInfo.length == 2) {
-        // There is a storage specification for the column
-        String storageOption = mapInfo[1];
-
-        if (!(storageOption.equals("-") || "string".startsWith(storageOption) 
|| "binary"
-            .startsWith(storageOption))) {
-          throw new SerDeException("Error: A column storage specification is 
one of"
-              + " the following: '-', a prefix of 'string', or a prefix of 
'binary'. "
-              + storageOption + " is not a valid storage option specification 
for "
-              + fieldSchema.getName());
-        }
-
-        boolean isBinaryEncoded = false;
-        if ("-".equals(storageOption)) {
-          isBinaryEncoded = tableDefaultStorageIsBinary;
-        } else if ("binary".startsWith(storageOption)) {
-          isBinaryEncoded = true;
-        }
-        if (isBinaryEncoded && !supportsBinaryEncoding) {
-          // Use string encoding and log a warning if the column spec is 
binary but the
-          // column type does not support it.
-          // TODO: Hive/HBase does not raise an exception, but should we?
-          LOG.warn("Column storage specification for column " + 
fieldSchema.getName()
-              + " is binary" + " but the column type " + fieldSchema.getType() 
+
-              " does not support binary encoding. Fallback to string format.");
-          isBinaryEncoded = false;
-        }
-        colIsBinaryEncoded.add(isBinaryEncoded);
-      } else {
-        // error in storage specification
-        throw new SerDeException("Error: " + HBaseSerDe.HBASE_COLUMNS_MAPPING
-            + " storage specification " + mappingSpec + " is not valid for 
column: "
-            + fieldSchema.getName());
-      }
-    }
-
-    if (rowKeyIndex == -1) {
-      columnFamilies.add(0, HBaseSerDe.HBASE_KEY_COL);
-      columnQualifiers.add(0, null);
-      colIsBinaryEncoded.add(0,
-          supportsBinaryEncoding(fieldSchemas.get(0)) && 
tableDefaultStorageIsBinary);
-    }
-  }
-
-  private boolean supportsBinaryEncoding(FieldSchema fs) {
-    try {
-      Type colType = parseColumnType(fs);
-      // Only boolean, integer and floating point types can use binary storage.
-      return colType.isBoolean() || colType.isIntegerType()
-          || colType.isFloatingPointType();
-    } catch (TableLoadingException e) {
-      return false;
-    }
-  }
-
-  @Override
-  /**
-   * For hbase tables, we can support tables with columns we don't understand 
at
-   * all (e.g. map) as long as the user does not select those. This is in 
contrast
-   * to hdfs tables since we typically need to understand all columns to make 
sense
-   * of the file at all.
-   */
-  public void load(boolean reuseMetadata, IMetaStoreClient client,
-      org.apache.hadoop.hive.metastore.api.Table msTbl) throws 
TableLoadingException {
-    Preconditions.checkNotNull(getMetaStoreTable());
-    try {
-      msTable_ = msTbl;
-      hbaseTableName_ = getHBaseTableName(getMetaStoreTable());
-      // Warm up the connection and verify the table exists.
-      getHBaseTable().close();
-      columnFamilies_ = null;
-      Map<String, String> serdeParams =
-          getMetaStoreTable().getSd().getSerdeInfo().getParameters();
-      String hbaseColumnsMapping = 
serdeParams.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
-      if (hbaseColumnsMapping == null) {
-        throw new MetaException("No hbase.columns.mapping defined in Serde.");
-      }
-
-      String hbaseTableDefaultStorageType = 
getMetaStoreTable().getParameters().get(
-          HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE);
-      boolean tableDefaultStorageIsBinary = false;
-      if (hbaseTableDefaultStorageType != null &&
-          !hbaseTableDefaultStorageType.isEmpty()) {
-        if (hbaseTableDefaultStorageType.equalsIgnoreCase("binary")) {
-          tableDefaultStorageIsBinary = true;
-        } else if (!hbaseTableDefaultStorageType.equalsIgnoreCase("string")) {
-          throw new SerDeException("Error: " +
-              HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE +
-              " parameter must be specified as" +
-              " 'string' or 'binary'; '" + hbaseTableDefaultStorageType +
-              "' is not a valid specification for this table/serde property.");
-        }
-      }
-
-      // Parse HBase column-mapping string.
-      List<FieldSchema> fieldSchemas = getMetaStoreTable().getSd().getCols();
-      List<String> hbaseColumnFamilies = new ArrayList<String>();
-      List<String> hbaseColumnQualifiers = new ArrayList<String>();
-      List<Boolean> hbaseColumnBinaryEncodings = new ArrayList<Boolean>();
-      parseColumnMapping(tableDefaultStorageIsBinary, hbaseColumnsMapping, 
fieldSchemas,
-          hbaseColumnFamilies, hbaseColumnQualifiers, 
hbaseColumnBinaryEncodings);
-      Preconditions.checkState(
-          hbaseColumnFamilies.size() == hbaseColumnQualifiers.size());
-      Preconditions.checkState(fieldSchemas.size() == 
hbaseColumnFamilies.size());
-
-      // Populate tmp cols in the order they appear in the Hive metastore.
-      // We will reorder the cols below.
-      List<HBaseColumn> tmpCols = Lists.newArrayList();
-      // Store the key column separately.
-      // TODO: Change this to an ArrayList once we support composite row keys.
-      HBaseColumn keyCol = null;
-      for (int i = 0; i < fieldSchemas.size(); ++i) {
-        FieldSchema s = fieldSchemas.get(i);
-        Type t = Type.INVALID;
-        try {
-          t = parseColumnType(s);
-        } catch (TableLoadingException e) {
-          // Ignore hbase types we don't support yet. We can load the metadata
-          // but won't be able to select from it.
-        }
-        HBaseColumn col = new HBaseColumn(s.getName(), 
hbaseColumnFamilies.get(i),
-            hbaseColumnQualifiers.get(i), hbaseColumnBinaryEncodings.get(i),
-            t, s.getComment(), -1);
-        if (col.getColumnFamily().equals(ROW_KEY_COLUMN_FAMILY)) {
-          // Store the row key column separately from the rest
-          keyCol = col;
-        } else {
-          tmpCols.add(col);
-        }
-      }
-      Preconditions.checkState(keyCol != null);
-
-      // The backend assumes that the row key column is always first and
-      // that the remaining HBase columns are ordered by 
columnFamily,columnQualifier,
-      // so the final position depends on the other mapped HBase columns.
-      // Sort columns and update positions.
-      Collections.sort(tmpCols);
-      clearColumns();
-
-      keyCol.setPosition(0);
-      addColumn(keyCol);
-      // Update the positions of the remaining columns
-      for (int i = 0; i < tmpCols.size(); ++i) {
-        HBaseColumn col = tmpCols.get(i);
-        col.setPosition(i + 1);
-        addColumn(col);
-      }
-
-      // Set table stats.
-      numRows_ = getRowCount(super.getMetaStoreTable().getParameters());
-
-      // since we don't support composite hbase rowkeys yet, all hbase tables 
have a
-      // single clustering col
-      numClusteringCols_ = 1;
-      loadAllColumnStats(client);
-    } catch (Exception e) {
-      throw new TableLoadingException("Failed to load metadata for HBase 
table: " +
-          name_, e);
-    }
-  }
-
-  @Override
-  protected void loadFromThrift(TTable table) throws TableLoadingException {
-    super.loadFromThrift(table);
-    try {
-      hbaseTableName_ = getHBaseTableName(getMetaStoreTable());
-      // Warm up the connection and verify the table exists.
-      getHBaseTable().close();
-      columnFamilies_ = null;
-    } catch (Exception e) {
-      throw new TableLoadingException("Failed to load metadata for HBase table 
from " +
-          "thrift table: " + name_, e);
-    }
-  }
-
-  /**
-   * This method is completely copied from Hive's HBaseStorageHandler.java.
-   */
-  private String getHBaseTableName(org.apache.hadoop.hive.metastore.api.Table 
tbl) {
-    // Give preference to TBLPROPERTIES over SERDEPROPERTIES
-    // (really we should only use TBLPROPERTIES, so this is just
-    // for backwards compatibility with the original specs).
-    String tableName = tbl.getParameters().get(HBaseSerDe.HBASE_TABLE_NAME);
-    if (tableName == null) {
-      tableName = tbl.getSd().getSerdeInfo().getParameters().get(
-          HBaseSerDe.HBASE_TABLE_NAME);
-    }
-    if (tableName == null) {
-      tableName = tbl.getDbName() + "." + tbl.getTableName();
-      if (tableName.startsWith(DEFAULT_PREFIX)) {
-        tableName = tableName.substring(DEFAULT_PREFIX.length());
-      }
-    }
-    return tableName;
-  }
-
-  /**
-   * Estimates the number of rows for a single region and returns a pair with
-   * the estimated row count and the estimated size in bytes per row.
-   */
-  private Pair<Long, Long> getEstimatedRowStatsForRegion(HRegionLocation 
location,
-      boolean isCompressed, ClusterStatus clusterStatus) throws IOException {
-    HRegionInfo info = location.getRegionInfo();
-
-    Scan s = new Scan(info.getStartKey());
-    // Get a small sample of rows
-    s.setBatch(ROW_COUNT_ESTIMATE_BATCH_SIZE);
-    // Try and get every version so the row's size can be used to estimate.
-    s.setMaxVersions(Short.MAX_VALUE);
-    // Don't cache the blocks as we don't think these are
-    // necessarily important blocks.
-    s.setCacheBlocks(false);
-    // Try and get deletes too so their size can be counted.
-    s.setRaw(false);
-
-    org.apache.hadoop.hbase.client.Table table = getHBaseTable();
-    ResultScanner rs = table.getScanner(s);
-
-    long currentRowSize = 0;
-    long currentRowCount = 0;
-
-    try {
-      // Get the the ROW_COUNT_ESTIMATE_BATCH_SIZE fetched rows
-      // for a representative sample
-      for (int i = 0; i < ROW_COUNT_ESTIMATE_BATCH_SIZE; ++i) {
-        Result r = rs.next();
-        if (r == null)
-          break;
-        // Check for empty rows, see IMPALA-1451
-        if (r.isEmpty())
-          continue;
-        ++currentRowCount;
-        // To estimate the number of rows we simply use the amount of bytes
-        // returned from the underlying buffer. Since HBase internally works
-        // with these structures as well this gives us ok estimates.
-        Cell[] cells = r.rawCells();
-        for (Cell c : cells) {
-          if (c instanceof KeyValue) {
-            currentRowSize += 
KeyValue.getKeyValueDataStructureSize(c.getRowLength(),
-                c.getFamilyLength(), c.getQualifierLength(), 
c.getValueLength(),
-                c.getTagsLength());
-          } else {
-            throw new IllegalStateException("Celltype " + 
c.getClass().getName() +
-                " not supported.");
-          }
-        }
-      }
-    } finally {
-      rs.close();
-      closeHBaseTable(table);
-    }
-
-    // If there are no rows then no need to estimate.
-    if (currentRowCount == 0) return new Pair<Long, Long>(0L, 0L);
-    // Get the size.
-    long currentSize = getRegionSize(location, clusterStatus);
-    // estimate the number of rows.
-    double bytesPerRow = currentRowSize / (double) currentRowCount;
-    if (currentSize == 0) {
-      return new Pair<Long, Long>(currentRowCount, (long) bytesPerRow);
-    }
-
-    // Compression factor two is only a best effort guess
-    long estimatedRowCount =
-        (long) ((isCompressed ? 2 : 1) * (currentSize / bytesPerRow));
-
-    return new Pair<Long, Long>(estimatedRowCount, (long) bytesPerRow);
-  }
-
-  /**
-   * Get an estimate of the number of rows and bytes per row in regions between
-   * startRowKey and endRowKey.
-   *
-   * This number is calculated by incrementally checking as many region 
servers as
-   * necessary until we observe a relatively constant row size per region on 
average.
-   * Depending on the skew of data in the regions this can either mean that we 
need
-   * to check only a minimal number of regions or that we will scan all 
regions.
-   *
-   * The HBase region servers periodically update the master with their 
metrics,
-   * including storefile size. We get the size of the storefiles for all 
regions in
-   * the cluster with a single call to getClusterStatus from the master.
-   *
-   * The accuracy of this number is determined by the number of rows that are 
written
-   * and kept in the memstore and have not been flushed until now. A large 
number
-   * of key-value pairs in the memstore will lead to bad estimates as this 
number
-   * is not reflected in the storefile size that is used to estimate this 
number.
-   *
-   * Currently, the algorithm does not consider the case that the key range 
used as a
-   * parameter might be generally of different size than the rest of the 
region.
-   *
-   * The values computed here should be cached so that in high qps workloads
-   * the nn is not overwhelmed. Could be done in load(); Synchronized to make
-   * sure that only one thread at a time is using the htable.
-   *
-   * @param startRowKey
-   *          First row key in the range
-   * @param endRowKey
-   *          Last row key in the range
-   * @return The estimated number of rows in the regions between the row keys 
(first) and
-   *         the estimated row size in bytes (second).
-   */
-  public synchronized Pair<Long, Long> getEstimatedRowStats(byte[] startRowKey,
-      byte[] endRowKey) {
-    Preconditions.checkNotNull(startRowKey);
-    Preconditions.checkNotNull(endRowKey);
-
-    boolean isCompressed = false;
-    long rowCount = 0;
-    long rowSize = 0;
-
-    org.apache.hadoop.hbase.client.Table table = null;
-    try {
-      table = getHBaseTable();
-      ClusterStatus clusterStatus = getClusterStatus();
-
-      // Check to see if things are compressed.
-      // If they are we'll estimate a compression factor.
-      if (columnFamilies_ == null) {
-        columnFamilies_ = table.getTableDescriptor().getColumnFamilies();
-      }
-      Preconditions.checkNotNull(columnFamilies_);
-      for (HColumnDescriptor desc : columnFamilies_) {
-        isCompressed |= desc.getCompression() !=  Compression.Algorithm.NONE;
-      }
-
-      // Fetch all regions for the key range
-      List<HRegionLocation> locations = getRegionsInRange(table, startRowKey, 
endRowKey);
-      Collections.shuffle(locations);
-      // The following variables track the number and size of 'rows' in
-      // HBase and allow incremental calculation of the average and standard
-      // deviation.
-      StatsHelper<Long> statsSize = new StatsHelper<Long>();
-      long totalEstimatedRows = 0;
-
-      // Collects stats samples from at least MIN_NUM_REGIONS_TO_CHECK
-      // and at most all regions until the delta is small enough.
-      while ((statsSize.count() < MIN_NUM_REGIONS_TO_CHECK ||
-          statsSize.stddev() > statsSize.mean() * DELTA_FROM_AVERAGE) &&
-          statsSize.count() < locations.size()) {
-        HRegionLocation currentLocation = locations.get((int) 
statsSize.count());
-        Pair<Long, Long> tmp = getEstimatedRowStatsForRegion(currentLocation,
-            isCompressed, clusterStatus);
-        totalEstimatedRows += tmp.first;
-        statsSize.addSample(tmp.second);
-      }
-
-      // Sum up the total size for all regions in range.
-      long totalSize = 0;
-      for (final HRegionLocation location : locations) {
-        totalSize += getRegionSize(location, clusterStatus);
-      }
-      if (totalSize == 0) {
-        rowCount = totalEstimatedRows;
-      } else {
-        rowCount = (long) (totalSize / statsSize.mean());
-      }
-      rowSize = (long) statsSize.mean();
-    } catch (IOException ioe) {
-      // Print the stack trace, but we'll ignore it
-      // as this is just an estimate.
-      // TODO: Put this into the per query log.
-      LOG.error("Error computing HBase row count estimate", ioe);
-      return new Pair<Long, Long>(-1l, -1l);
-    } finally {
-      if (table != null) closeHBaseTable(table);
-    }
-    return new Pair<Long, Long>(rowCount, rowSize);
-  }
-
-  /**
-   * Returns the size of the given region in bytes. Simply returns the 
storefile size
-   * for this region from the ClusterStatus. Returns 0 in case of an error.
-   */
-  public long getRegionSize(HRegionLocation location, ClusterStatus 
clusterStatus) {
-    HRegionInfo info = location.getRegionInfo();
-    ServerLoad serverLoad = clusterStatus.getLoad(location.getServerName());
-
-    // If the serverLoad is null, the master doesn't have information for this 
region's
-    // server. This shouldn't normally happen.
-    if (serverLoad == null) {
-      LOG.error("Unable to find load for server: " + location.getServerName() +
-          " for location " + info.getRegionNameAsString());
-      return 0;
-    }
-    RegionLoad regionLoad = 
serverLoad.getRegionsLoad().get(info.getRegionName());
-
-    final long megaByte = 1024L * 1024L;
-    return regionLoad.getStorefileSizeMB() * megaByte;
-  }
-
-  /**
-   * Hive returns the columns in order of their declaration for HBase tables.
-   */
-  @Override
-  public ArrayList<Column> getColumnsInHiveOrder() {
-    return getColumns();
-  }
-
-  @Override
-  public TTableDescriptor toThriftDescriptor(Set<Long> referencedPartitions) {
-    TTableDescriptor tableDescriptor =
-        new TTableDescriptor(id_.asInt(), TTableType.HBASE_TABLE,
-            getTColumnDescriptors(), numClusteringCols_, hbaseTableName_, 
db_.getName());
-    tableDescriptor.setHbaseTable(getTHBaseTable());
-    return tableDescriptor;
-  }
-
-  public String getHBaseTableName() {
-    return hbaseTableName_;
-  }
-
-  public static Configuration getHBaseConf() {
-    return hbaseConf_;
-  }
-
-  public int getNumNodes() {
-    // TODO: implement
-    return 100;
-  }
-
-  @Override
-  public TCatalogObjectType getCatalogObjectType() {
-    return TCatalogObjectType.TABLE;
-  }
-
-  @Override
-  public TTable toThrift() {
-    TTable table = super.toThrift();
-    table.setTable_type(TTableType.HBASE_TABLE);
-    table.setHbase_table(getTHBaseTable());
-    return table;
-  }
-
-  private THBaseTable getTHBaseTable() {
-    THBaseTable tHbaseTable = new THBaseTable();
-    tHbaseTable.setTableName(hbaseTableName_);
-    for (Column c : getColumns()) {
-      HBaseColumn hbaseCol = (HBaseColumn) c;
-      tHbaseTable.addToFamilies(hbaseCol.getColumnFamily());
-      if (hbaseCol.getColumnQualifier() != null) {
-        tHbaseTable.addToQualifiers(hbaseCol.getColumnQualifier());
-      } else {
-        tHbaseTable.addToQualifiers("");
-      }
-      tHbaseTable.addToBinary_encoded(hbaseCol.isBinaryEncoded());
-    }
-    return tHbaseTable;
-  }
-
-  /**
-   * This is copied from org.apache.hadoop.hbase.client.HTable. The only 
difference is
-   * that it does not use cache when calling getRegionLocation.
-   * TODO: Remove this function and use HTable.getRegionsInRange when the 
non-cache
-   * version has been ported to CDH (DISTRO-477).
-   * Get the corresponding regions for an arbitrary range of keys.
-   * <p>
-   *
-   * @param startRow
-   *          Starting row in range, inclusive
-   * @param endRow
-   *          Ending row in range, exclusive
-   * @return A list of HRegionLocations corresponding to the regions that
-   *         contain the specified range
-   * @throws IOException
-   *           if a remote or network exception occurs
-   */
-  public static List<HRegionLocation> getRegionsInRange(
-      org.apache.hadoop.hbase.client.Table hbaseTbl,
-      final byte[] startKey, final byte[] endKey) throws IOException {
-    final boolean endKeyIsEndOfTable = Bytes.equals(endKey, 
HConstants.EMPTY_END_ROW);
-    if ((Bytes.compareTo(startKey, endKey) > 0) && !endKeyIsEndOfTable) {
-      throw new IllegalArgumentException("Invalid range: " +
-          Bytes.toStringBinary(startKey) + " > " + 
Bytes.toStringBinary(endKey));
-    }
-    final List<HRegionLocation> regionList = new ArrayList<HRegionLocation>();
-    byte[] currentKey = startKey;
-    Connection connection = ConnectionHolder.getConnection(hbaseConf_);
-    // Make sure only one thread is accessing the hbaseTbl.
-    synchronized (hbaseTbl) {
-      RegionLocator locator = connection.getRegionLocator(hbaseTbl.getName());
-      do {
-        // always reload region location info.
-        HRegionLocation regionLocation = locator.getRegionLocation(currentKey, 
true);
-        regionList.add(regionLocation);
-        currentKey = regionLocation.getRegionInfo().getEndKey();
-      } while (!Bytes.equals(currentKey, HConstants.EMPTY_END_ROW) &&
-          (endKeyIsEndOfTable || Bytes.compareTo(currentKey, endKey) < 0));
-    }
-    return regionList;
-  }
-
-  /**
-   * Returns the storage handler class for HBase tables read by Hive.
-   */
-  @Override
-  public String getStorageHandlerClassName() {
-    return HBASE_STORAGE_HANDLER;
-  }
-
-  /**
-   * Returns statistics on this table as a tabular result set. Used for the
-   * SHOW TABLE STATS statement. The schema of the returned TResultSet is set
-   * inside this method.
-   */
-  public TResultSet getTableStats() {
-    TResultSet result = new TResultSet();
-    TResultSetMetadata resultSchema = new TResultSetMetadata();
-    result.setSchema(resultSchema);
-    resultSchema.addToColumns(
-        new TColumn("Region Location", Type.STRING.toThrift()));
-    resultSchema.addToColumns(new TColumn("Start RowKey",
-        Type.STRING.toThrift()));
-    resultSchema.addToColumns(new TColumn("Est. #Rows", 
Type.BIGINT.toThrift()));
-    resultSchema.addToColumns(new TColumn("Size", Type.STRING.toThrift()));
-
-    org.apache.hadoop.hbase.client.Table table;
-    try {
-      table = getHBaseTable();
-    } catch (IOException e) {
-      LOG.error("Error getting HBase table " + hbaseTableName_, e);
-      throw new RuntimeException(e);
-    }
-
-    // TODO: Consider fancier stats maintenance techniques for speeding up 
this process.
-    // Currently, we list all regions and perform a mini-scan of each of them 
to
-    // estimate the number of rows, the data size, etc., which is rather 
expensive.
-    try {
-      ClusterStatus clusterStatus = getClusterStatus();
-      long totalNumRows = 0;
-      long totalSize = 0;
-      List<HRegionLocation> regions = HBaseTable.getRegionsInRange(table,
-          HConstants.EMPTY_END_ROW, HConstants.EMPTY_START_ROW);
-      for (HRegionLocation region : regions) {
-        TResultRowBuilder rowBuilder = new TResultRowBuilder();
-        HRegionInfo regionInfo = region.getRegionInfo();
-        Pair<Long, Long> estRowStats =
-            getEstimatedRowStatsForRegion(region, false, clusterStatus);
-
-        long numRows = estRowStats.first.longValue();
-        long regionSize = getRegionSize(region, clusterStatus);
-        totalNumRows += numRows;
-        totalSize += regionSize;
-
-        // Add the region location, start rowkey, number of rows and raw size.
-        rowBuilder.add(String.valueOf(region.getHostname()))
-            .add(Bytes.toString(regionInfo.getStartKey())).add(numRows)
-            .addBytes(regionSize);
-        result.addToRows(rowBuilder.get());
-      }
-
-      // Total num rows and raw region size.
-      if (regions.size() > 1) {
-        TResultRowBuilder rowBuilder = new TResultRowBuilder();
-        rowBuilder.add("Total").add("").add(totalNumRows).addBytes(totalSize);
-        result.addToRows(rowBuilder.get());
-      }
-    } catch (IOException e) {
-      throw new RuntimeException(e);
-    } finally {
-      closeHBaseTable(table);
-    }
-    return result;
-  }
-
-  /**
-   * Returns true if the given Metastore Table represents an HBase table.
-   * Versions of Hive/HBase are inconsistent which HBase related fields are set
-   * (e.g., HIVE-6548 changed the input format to null).
-   * For maximum compatibility consider all known fields that indicate an 
HBase table.
-   */
-  public static boolean isHBaseTable(
-      org.apache.hadoop.hive.metastore.api.Table msTbl) {
-    if (msTbl.getParameters() != null &&
-        msTbl.getParameters().containsKey(HBASE_STORAGE_HANDLER)) {
-      return true;
-    }
-    StorageDescriptor sd = msTbl.getSd();
-    if (sd == null) return false;
-    if (sd.getInputFormat() != null && 
sd.getInputFormat().equals(HBASE_INPUT_FORMAT)) {
-      return true;
-    } else if (sd.getSerdeInfo() != null &&
-        sd.getSerdeInfo().getSerializationLib() != null &&
-        
sd.getSerdeInfo().getSerializationLib().equals(HBASE_SERIALIZATION_LIB)) {
-      return true;
-    }
-    return false;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/catalog/HdfsCachePool.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/catalog/HdfsCachePool.java 
b/fe/src/main/java/com/cloudera/impala/catalog/HdfsCachePool.java
deleted file mode 100644
index b8ff102..0000000
--- a/fe/src/main/java/com/cloudera/impala/catalog/HdfsCachePool.java
+++ /dev/null
@@ -1,65 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package com.cloudera.impala.catalog;
-
-import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
-
-import com.cloudera.impala.thrift.TCatalogObjectType;
-import com.cloudera.impala.thrift.THdfsCachePool;
-import com.google.common.base.Preconditions;
-
-/**
- * Represents an HDFS cache pool (CachePoolInfo class). Currently, the only 
metadata we
- * care about for cache pools is the cache pool name. In the future it may be 
desirable
- * to track additional metadata such as the owner, size, and current usage of 
the pool.
- */
-public class HdfsCachePool implements CatalogObject {
-  private long catalogVersion_;
-  private final THdfsCachePool cachePool_;
-
-  public HdfsCachePool(CachePoolInfo cachePoolInfo) {
-    cachePool_ = new THdfsCachePool(cachePoolInfo.getPoolName());
-  }
-
-  public HdfsCachePool(THdfsCachePool cachePool) {
-    Preconditions.checkNotNull(cachePool);
-    cachePool_ = cachePool;
-  }
-
-  @Override
-  public TCatalogObjectType getCatalogObjectType() {
-    return TCatalogObjectType.HDFS_CACHE_POOL;
-  }
-
-  public THdfsCachePool toThrift() {
-    return cachePool_;
-  }
-
-  public static HdfsCachePool fromThrift(THdfsCachePool cachePool) {
-    return new HdfsCachePool(cachePool);
-  }
-
-  @Override
-  public String getName() { return cachePool_.getPool_name(); }
-  @Override
-  public long getCatalogVersion() { return catalogVersion_; }
-  @Override
-  public void setCatalogVersion(long newVersion) { catalogVersion_ = 
newVersion; }
-  @Override
-  public boolean isLoaded() { return true; }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/catalog/HdfsCompression.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/catalog/HdfsCompression.java 
b/fe/src/main/java/com/cloudera/impala/catalog/HdfsCompression.java
deleted file mode 100644
index 302ec99..0000000
--- a/fe/src/main/java/com/cloudera/impala/catalog/HdfsCompression.java
+++ /dev/null
@@ -1,85 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package com.cloudera.impala.catalog;
-
-import com.cloudera.impala.thrift.THdfsCompression;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableMap;
-
-/**
- * Support for recognizing compression suffixes on data files.
- * Compression of a file is recognized in mapreduce by looking for suffixes of
- * supported codecs.
- * For now Impala supports LZO, GZIP, SNAPPY, and BZIP2. LZO can use the 
specific HIVE
- * input class.
- */
-// TODO: Add LZ4?
-public enum HdfsCompression {
-  NONE,
-  DEFLATE,
-  GZIP,
-  BZIP2,
-  SNAPPY,
-  LZO,
-  LZO_INDEX; //Lzo index file.
-
-  /* Map from a suffix to a compression type */
-  private static final ImmutableMap<String, HdfsCompression> SUFFIX_MAP =
-      ImmutableMap.<String, HdfsCompression>builder().
-          put("deflate", DEFLATE).
-          put("gz", GZIP).
-          put("bz2", BZIP2).
-          put("snappy", SNAPPY).
-          put("lzo", LZO).
-          put("index", LZO_INDEX).
-          build();
-
-  /* Given a file name return its compression type, if any. */
-  public static HdfsCompression fromFileName(String fileName) {
-    int index = fileName.lastIndexOf(".");
-    if (index == -1) {
-      return NONE;
-    }
-
-    String suffix = fileName.substring(index + 1);
-    HdfsCompression compression = SUFFIX_MAP.get(suffix.toLowerCase());
-    return compression == null ? NONE : compression;
-  }
-
-  public THdfsCompression toThrift() {
-    switch (this) {
-    case NONE: return THdfsCompression.NONE;
-    case DEFLATE: return THdfsCompression.DEFLATE;
-    case GZIP: return THdfsCompression.GZIP;
-    case BZIP2: return THdfsCompression.BZIP2;
-    case SNAPPY: return THdfsCompression.SNAPPY_BLOCKED;
-    case LZO: return THdfsCompression.LZO;
-    default: throw new IllegalStateException("Unexpected codec: " + this);
-    }
-  }
-
-  /* Returns a compression type based on (Hive's) intput format. Special case 
for LZO. */
-  public static HdfsCompression fromHdfsInputFormatClass(String 
inputFormatClass) {
-    // TODO: Remove when we have the native LZO writer.
-    Preconditions.checkNotNull(inputFormatClass);
-    if (inputFormatClass.equals(HdfsFileFormat.LZO_TEXT.inputFormat())) {
-      return LZO;
-    }
-    return NONE;
-  }
-}

[30/61] [partial] incubator-impala git commit: IMPALA-3786: Replace "cloudera" with "apache" (part 1)

Reply via email to