http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/catalog/DatabaseNotFoundException.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/catalog/DatabaseNotFoundException.java b/fe/src/main/java/com/cloudera/impala/catalog/DatabaseNotFoundException.java deleted file mode 100644 index 8affb11..0000000 --- a/fe/src/main/java/com/cloudera/impala/catalog/DatabaseNotFoundException.java +++ /dev/null @@ -1,29 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package com.cloudera.impala.catalog; - - -/** - * Thrown when a database cannot be found in the catalog. - */ -public class DatabaseNotFoundException extends CatalogException { - // Dummy serial ID to satisfy Eclipse - private static final long serialVersionUID = -2203080667446640542L; - - public DatabaseNotFoundException(String s) { super(s); } -} \ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/catalog/Db.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/catalog/Db.java b/fe/src/main/java/com/cloudera/impala/catalog/Db.java deleted file mode 100644 index a9150fe..0000000 --- a/fe/src/main/java/com/cloudera/impala/catalog/Db.java +++ /dev/null @@ -1,495 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package com.cloudera.impala.catalog; - -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.commons.codec.binary.Base64; -import org.apache.thrift.protocol.TCompactProtocol; -import org.apache.thrift.TException; -import org.apache.thrift.TSerializer; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.cloudera.impala.catalog.Function; -import com.cloudera.impala.common.ImpalaException; -import com.cloudera.impala.common.ImpalaRuntimeException; -import com.cloudera.impala.common.JniUtil; -import com.cloudera.impala.thrift.TCatalogObjectType; -import com.cloudera.impala.thrift.TDatabase; -import com.cloudera.impala.thrift.TFunction; -import com.cloudera.impala.thrift.TFunctionBinaryType; -import com.cloudera.impala.thrift.TFunctionCategory; -import com.cloudera.impala.util.PatternMatcher; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; - -/** - * Internal representation of db-related metadata. Owned by Catalog instance. - * Not thread safe. - * - * Tables are stored in a map from the table name to the table object. They may - * be loaded 'eagerly' at construction or 'lazily' on first reference. - * Tables are accessed via getTable which may trigger a metadata read in two cases: - * * if the table has never been loaded - * * if the table loading failed on the previous attempt - * - * Native user added functions are persisted to the parameters map of the hive metastore - * db object corresponding to this instance. This map's key is the function signature and - * value is the base64 representation of the thrift serialized function object. - * - */ -public class Db implements CatalogObject { - private static final Logger LOG = LoggerFactory.getLogger(Db.class); - private final Catalog parentCatalog_; - private final TDatabase thriftDb_; - private long catalogVersion_ = Catalog.INITIAL_CATALOG_VERSION; - - public static final String FUNCTION_INDEX_PREFIX = "impala_registered_function_"; - - // Hive metastore imposes a limit of 4000 bytes on the key and value strings - // in DB parameters map. We need ensure that this limit isn't crossed - // while serializing functions to the metastore. - private static final int HIVE_METASTORE_DB_PARAM_LIMIT_BYTES = 4000; - - // Table metadata cache. - private final CatalogObjectCache<Table> tableCache_; - - // All of the registered user functions. The key is the user facing name (e.g. "myUdf"), - // and the values are all the overloaded variants (e.g. myUdf(double), myUdf(string)) - // This includes both UDFs and UDAs. Updates are made thread safe by synchronizing - // on this map. When a new Db object is initialized, this list is updated with the - // UDF/UDAs already persisted, if any, in the metastore DB. Functions are sorted in a - // canonical order defined by FunctionResolutionOrder. - private final HashMap<String, List<Function>> functions_; - - // If true, this database is an Impala system database. - // (e.g. can't drop it, can't add tables to it, etc). - private boolean isSystemDb_ = false; - - public Db(String name, Catalog catalog, - org.apache.hadoop.hive.metastore.api.Database msDb) { - thriftDb_ = new TDatabase(name.toLowerCase()); - parentCatalog_ = catalog; - thriftDb_.setMetastore_db(msDb); - tableCache_ = new CatalogObjectCache<Table>(); - functions_ = new HashMap<String, List<Function>>(); - } - - public void setIsSystemDb(boolean b) { isSystemDb_ = b; } - - /** - * Creates a Db object with no tables based on the given TDatabase thrift struct. - */ - public static Db fromTDatabase(TDatabase db, Catalog parentCatalog) { - return new Db(db.getDb_name(), parentCatalog, db.getMetastore_db()); - } - - /** - * Updates the hms parameters map by adding the input <k,v> pair. - */ - private void putToHmsParameters(String k, String v) { - org.apache.hadoop.hive.metastore.api.Database msDb = thriftDb_.metastore_db; - Preconditions.checkNotNull(msDb); - Map<String, String> hmsParams = msDb.getParameters(); - if (hmsParams == null) hmsParams = Maps.newHashMap(); - hmsParams.put(k,v); - msDb.setParameters(hmsParams); - } - - /** - * Updates the hms parameters map by removing the <k,v> pair corresponding to - * input key <k>. Returns true if the parameters map contains a pair <k,v> - * corresponding to input k and it is removed, false otherwise. - */ - private boolean removeFromHmsParameters(String k) { - org.apache.hadoop.hive.metastore.api.Database msDb = thriftDb_.metastore_db; - Preconditions.checkNotNull(msDb); - if (msDb.getParameters() == null) return false; - return msDb.getParameters().remove(k) != null; - } - - public boolean isSystemDb() { return isSystemDb_; } - public TDatabase toThrift() { return thriftDb_; } - @Override - public String getName() { return thriftDb_.getDb_name(); } - @Override - public TCatalogObjectType getCatalogObjectType() { - return TCatalogObjectType.DATABASE; - } - - /** - * Adds a table to the table cache. - */ - public void addTable(Table table) { - tableCache_.add(table); - } - - /** - * Gets all table names in the table cache. - */ - public List<String> getAllTableNames() { - return Lists.newArrayList(tableCache_.keySet()); - } - - public boolean containsTable(String tableName) { - return tableCache_.contains(tableName.toLowerCase()); - } - - /** - * Returns the Table with the given name if present in the table cache or null if the - * table does not exist in the cache. - */ - public Table getTable(String tblName) { - return tableCache_.get(tblName); - } - - /** - * Removes the table name and any cached metadata from the Table cache. - */ - public Table removeTable(String tableName) { - return tableCache_.remove(tableName.toLowerCase()); - } - - /** - * Comparator that sorts function overloads. We want overloads to be always considered - * in a canonical order so that overload resolution in the case of multiple valid - * overloads does not depend on the order in which functions are added to the Db. The - * order is based on the PrimitiveType enum because this was the order used implicitly - * for builtin operators and functions in earlier versions of Impala. - */ - private static class FunctionResolutionOrder implements Comparator<Function> { - @Override - public int compare(Function f1, Function f2) { - int numSharedArgs = Math.min(f1.getNumArgs(), f2.getNumArgs()); - for (int i = 0; i < numSharedArgs; ++i) { - int cmp = typeCompare(f1.getArgs()[i], f2.getArgs()[i]); - if (cmp < 0) { - return -1; - } else if (cmp > 0) { - return 1; - } - } - // Put alternative with fewer args first. - if (f1.getNumArgs() < f2.getNumArgs()) { - return -1; - } else if (f1.getNumArgs() > f2.getNumArgs()) { - return 1; - } - return 0; - } - - private int typeCompare(Type t1, Type t2) { - Preconditions.checkState(!t1.isComplexType()); - Preconditions.checkState(!t2.isComplexType()); - return Integer.compare(t1.getPrimitiveType().ordinal(), - t2.getPrimitiveType().ordinal()); - } - } - - private static final FunctionResolutionOrder FUNCTION_RESOLUTION_ORDER = - new FunctionResolutionOrder(); - - /** - * Returns the metastore.api.Database object this Database was created from. - * Returns null if it is not related to a hive database such as builtins_db. - */ - public org.apache.hadoop.hive.metastore.api.Database getMetaStoreDb() { - return thriftDb_.getMetastore_db(); - } - - /** - * Returns the number of functions in this database. - */ - public int numFunctions() { - synchronized (functions_) { - return functions_.size(); - } - } - - /** - * See comment in Catalog. - */ - public boolean containsFunction(String name) { - synchronized (functions_) { - return functions_.get(name) != null; - } - } - - /* - * See comment in Catalog. - */ - public Function getFunction(Function desc, Function.CompareMode mode) { - synchronized (functions_) { - List<Function> fns = functions_.get(desc.functionName()); - if (fns == null) return null; - - // First check for identical - for (Function f: fns) { - if (f.compare(desc, Function.CompareMode.IS_IDENTICAL)) return f; - } - if (mode == Function.CompareMode.IS_IDENTICAL) return null; - - // Next check for indistinguishable - for (Function f: fns) { - if (f.compare(desc, Function.CompareMode.IS_INDISTINGUISHABLE)) return f; - } - if (mode == Function.CompareMode.IS_INDISTINGUISHABLE) return null; - - // Next check for strict supertypes - for (Function f: fns) { - if (f.compare(desc, Function.CompareMode.IS_SUPERTYPE_OF)) return f; - } - if (mode == Function.CompareMode.IS_SUPERTYPE_OF) return null; - - // Finally check for non-strict supertypes - for (Function f: fns) { - if (f.compare(desc, Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF)) return f; - } - } - return null; - } - - public Function getFunction(String signatureString) { - synchronized (functions_) { - for (List<Function> fns: functions_.values()) { - for (Function f: fns) { - if (f.signatureString().equals(signatureString)) return f; - } - } - } - return null; - } - - /** - * Adds the user defined function fn to metastore DB params. fn is - * serialized to thrift using TBinaryProtocol and then base64-encoded - * to be compatible with the HMS' representation of params. - */ - private boolean addFunctionToDbParams(Function fn) { - Preconditions.checkState( - fn.getBinaryType() != TFunctionBinaryType.BUILTIN && - fn.getBinaryType() != TFunctionBinaryType.JAVA); - try { - TSerializer serializer = - new TSerializer(new TCompactProtocol.Factory()); - byte[] serializedFn = serializer.serialize(fn.toThrift()); - String base64Fn = Base64.encodeBase64String(serializedFn); - String fnKey = FUNCTION_INDEX_PREFIX + fn.signatureString(); - if (base64Fn.length() > HIVE_METASTORE_DB_PARAM_LIMIT_BYTES) { - throw new ImpalaRuntimeException( - "Serialized function size exceeded HMS 4K byte limit"); - } - putToHmsParameters(fnKey, base64Fn); - } catch (ImpalaException | TException e) { - LOG.error("Error adding function " + fn.getName() + " to DB params", e); - return false; - } - return true; - } - - public boolean addFunction(Function fn) { - // We use the db parameters map to persist native and IR functions. - boolean addToDbParams = - (fn.getBinaryType() == TFunctionBinaryType.NATIVE || - fn.getBinaryType() == TFunctionBinaryType.IR); - return addFunction(fn, addToDbParams); - } - - /** - * Registers the function fn to this database. If addToDbParams is true, - * fn is added to the metastore DB params. Returns false if the function - * fn already exists or when a failure is encountered while adding it to - * the metastore DB params and true otherwise. - */ - public boolean addFunction(Function fn, boolean addToDbParams) { - Preconditions.checkState(fn.dbName().equals(getName())); - synchronized (functions_) { - if (getFunction(fn, Function.CompareMode.IS_INDISTINGUISHABLE) != null) { - return false; - } - List<Function> fns = functions_.get(fn.functionName()); - if (fns == null) { - fns = Lists.newArrayList(); - functions_.put(fn.functionName(), fns); - } - if (addToDbParams && !addFunctionToDbParams(fn)) return false; - fns.add(fn); - Collections.sort(fns, FUNCTION_RESOLUTION_ORDER); - return true; - } - } - - /** - * See comment in Catalog. - */ - public Function removeFunction(Function desc) { - synchronized (functions_) { - Function fn = getFunction(desc, Function.CompareMode.IS_INDISTINGUISHABLE); - if (fn == null) return null; - List<Function> fns = functions_.get(desc.functionName()); - Preconditions.checkNotNull(fns); - fns.remove(fn); - if (fns.isEmpty()) functions_.remove(desc.functionName()); - if (fn.getBinaryType() == TFunctionBinaryType.JAVA) return fn; - // Remove the function from the metastore database parameters - String fnKey = FUNCTION_INDEX_PREFIX + fn.signatureString(); - boolean removeFn = removeFromHmsParameters(fnKey); - Preconditions.checkState(removeFn); - return fn; - } - } - - /** - * Removes a Function with the matching signature string. Returns the removed Function - * if a Function was removed as a result of this call, null otherwise. - * TODO: Move away from using signature strings and instead use Function IDs. - */ - public Function removeFunction(String signatureStr) { - synchronized (functions_) { - Function targetFn = getFunction(signatureStr); - if (targetFn != null) return removeFunction(targetFn); - } - return null; - } - - /** - * Add a builtin with the specified name and signatures to this db. - * This defaults to not using a Prepare/Close function. - */ - public void addScalarBuiltin(String fnName, String symbol, boolean userVisible, - boolean varArgs, Type retType, Type ... args) { - addScalarBuiltin(fnName, symbol, userVisible, null, null, varArgs, retType, args); - } - - /** - * Add a builtin with the specified name and signatures to this db. - */ - public void addScalarBuiltin(String fnName, String symbol, boolean userVisible, - String prepareFnSymbol, String closeFnSymbol, boolean varArgs, Type retType, - Type ... args) { - Preconditions.checkState(isSystemDb()); - addBuiltin(ScalarFunction.createBuiltin( - fnName, Lists.newArrayList(args), varArgs, retType, - symbol, prepareFnSymbol, closeFnSymbol, userVisible)); - } - - /** - * Adds a builtin to this database. The function must not already exist. - */ - public void addBuiltin(Function fn) { - Preconditions.checkState(isSystemDb()); - Preconditions.checkState(fn != null); - Preconditions.checkState(getFunction(fn, Function.CompareMode.IS_IDENTICAL) == null); - addFunction(fn, false); - } - - /** - * Returns a map of functionNames to list of (overloaded) functions with that name. - * This is not thread safe so a higher level lock must be taken while iterating - * over the returned functions. - */ - protected HashMap<String, List<Function>> getAllFunctions() { - return functions_; - } - - /** - * Returns a list of transient functions in this Db. - */ - protected List<Function> getTransientFunctions() { - List<Function> result = Lists.newArrayList(); - synchronized (functions_) { - for (String fnKey: functions_.keySet()) { - for (Function fn: functions_.get(fnKey)) { - if (fn.userVisible() && !fn.isPersistent()) { - result.add(fn); - } - } - } - } - return result; - } - - /** - * Returns all functions that match the pattern of 'matcher'. - */ - public List<Function> getFunctions(TFunctionCategory category, - PatternMatcher matcher) { - Preconditions.checkNotNull(matcher); - List<Function> result = Lists.newArrayList(); - synchronized (functions_) { - for (Map.Entry<String, List<Function>> fns: functions_.entrySet()) { - if (!matcher.matches(fns.getKey())) continue; - for (Function fn: fns.getValue()) { - if ((category == null || Function.categoryMatch(fn, category)) - && fn.userVisible()) { - result.add(fn); - } - } - } - } - return result; - } - - /** - * Returns all functions with the given name - */ - public List<Function> getFunctions(String name) { - List<Function> result = Lists.newArrayList(); - Preconditions.checkNotNull(name); - synchronized (functions_) { - if (!functions_.containsKey(name)) return result; - for (Function fn: functions_.get(name)) { - if (fn.userVisible()) result.add(fn); - } - } - return result; - } - - /** - * Returns all functions with the given name and category. - */ - public List<Function> getFunctions(TFunctionCategory category, String name) { - List<Function> result = Lists.newArrayList(); - Preconditions.checkNotNull(category); - Preconditions.checkNotNull(name); - synchronized (functions_) { - if (!functions_.containsKey(name)) return result; - for (Function fn: functions_.get(name)) { - if (fn.userVisible() && Function.categoryMatch(fn, category)) { - result.add(fn); - } - } - } - return result; - } - - @Override - public long getCatalogVersion() { return catalogVersion_; } - @Override - public void setCatalogVersion(long newVersion) { catalogVersion_ = newVersion; } - public Catalog getParentCatalog() { return parentCatalog_; } - - @Override - public boolean isLoaded() { return true; } -} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/catalog/Function.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/catalog/Function.java b/fe/src/main/java/com/cloudera/impala/catalog/Function.java deleted file mode 100644 index 406e958..0000000 --- a/fe/src/main/java/com/cloudera/impala/catalog/Function.java +++ /dev/null @@ -1,488 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package com.cloudera.impala.catalog; - -import java.util.List; - -import com.cloudera.impala.analysis.FunctionName; -import com.cloudera.impala.analysis.HdfsUri; -import com.cloudera.impala.common.AnalysisException; -import com.cloudera.impala.common.InternalException; -import com.cloudera.impala.service.FeSupport; -import com.cloudera.impala.thrift.TAggregateFunction; -import com.cloudera.impala.thrift.TCatalogObjectType; -import com.cloudera.impala.thrift.TColumnType; -import com.cloudera.impala.thrift.TFunction; -import com.cloudera.impala.thrift.TFunctionBinaryType; -import com.cloudera.impala.thrift.TFunctionCategory; -import com.cloudera.impala.thrift.TScalarFunction; -import com.cloudera.impala.thrift.TSymbolLookupParams; -import com.cloudera.impala.thrift.TSymbolLookupResult; -import com.cloudera.impala.thrift.TSymbolType; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; - - -/** - * Base class for all functions. - * Each function can be of the following 4 types. - * - Native/IR stored in db params (persisted, visible to Impala) - * - Hive UDFs stored in the HMS (visible to Hive + Impala) - * - Java UDFs which are not persisted (visible to Impala but not Hive) - * - Builtin functions, which are recreated after every restart of the - * catalog. (persisted, visible to Impala) - */ -public class Function implements CatalogObject { - // Enum for how to compare function signatures. - // For decimal types, the type in the function can be a wildcard, i.e. decimal(*,*). - // The wildcard can *only* exist as function type, the caller will always be a - // fully specified decimal. - // For the purposes of function type resolution, decimal(*,*) will match exactly - // with any fully specified decimal (i.e. fn(decimal(*,*)) matches identically for - // the call to fn(decimal(1,0)). - public enum CompareMode { - // Two signatures are identical if the number of arguments and their types match - // exactly and either both signatures are varargs or neither. - IS_IDENTICAL, - - // Two signatures are indistinguishable if there is no way to tell them apart - // when matching a particular instantiation. That is, their fixed arguments - // match exactly and the remaining varargs have the same type. - // e.g. fn(int, int, int) and fn(int...) - // Argument types that are NULL are ignored when doing this comparison. - // e.g. fn(NULL, int) is indistinguishable from fn(int, int) - IS_INDISTINGUISHABLE, - - // X is a supertype of Y if Y.arg[i] can be strictly implicitly cast to X.arg[i]. If - /// X has vargs, the remaining arguments of Y must be strictly implicitly castable - // to the var arg type. The key property this provides is that X can be used in place - // of Y. e.g. fn(int, double, string...) is a supertype of fn(tinyint, float, string, - // string) - IS_SUPERTYPE_OF, - - // Nonstrict supertypes broaden the definition of supertype to accept implicit casts - // of arguments that may result in loss of precision - e.g. decimal to float. - IS_NONSTRICT_SUPERTYPE_OF, - } - - // User specified function name e.g. "Add" - private FunctionName name_; - - private final Type retType_; - // Array of parameter types. empty array if this function does not have parameters. - private Type[] argTypes_; - - // If true, this function has variable arguments. - // TODO: we don't currently support varargs with no fixed types. i.e. fn(...) - private boolean hasVarArgs_; - - // If true (default), this function is called directly by the user. For operators, - // this is false. If false, it also means the function is not visible from - // 'show functions'. - private boolean userVisible_; - - // Absolute path in HDFS for the binary that contains this function. - // e.g. /udfs/udfs.jar - private HdfsUri location_; - private TFunctionBinaryType binaryType_; - - // Set to true for functions that survive service restarts, including all builtins, - // native and IR functions, but only Java functions created without a signature. - private boolean isPersistent_; - private long catalogVersion_ = Catalog.INITIAL_CATALOG_VERSION; - - public Function(FunctionName name, Type[] argTypes, - Type retType, boolean varArgs) { - this.name_ = name; - this.hasVarArgs_ = varArgs; - if (argTypes == null) { - argTypes_ = new Type[0]; - } else { - this.argTypes_ = argTypes; - } - if (retType == null) { - this.retType_ = ScalarType.INVALID; - } else { - this.retType_ = retType; - } - this.userVisible_ = true; - } - - public Function(FunctionName name, List<Type> args, - Type retType, boolean varArgs) { - this(name, (Type[])null, retType, varArgs); - if (args != null && args.size() > 0) { - argTypes_ = args.toArray(new Type[args.size()]); - } else { - argTypes_ = new Type[0]; - } - } - - /** - * Static helper method to create a function with a given TFunctionBinaryType. - */ - public static Function createFunction(String db, String fnName, List<Type> args, - Type retType, boolean varArgs, TFunctionBinaryType fnType) { - Function fn = - new Function(new FunctionName(db, fnName), args, retType, varArgs); - fn.setBinaryType(fnType); - return fn; - } - - public FunctionName getFunctionName() { return name_; } - public String functionName() { return name_.getFunction(); } - public String dbName() { return name_.getDb(); } - public Type getReturnType() { return retType_; } - public Type[] getArgs() { return argTypes_; } - // Returns the number of arguments to this function. - public int getNumArgs() { return argTypes_.length; } - public HdfsUri getLocation() { return location_; } - public TFunctionBinaryType getBinaryType() { return binaryType_; } - public boolean hasVarArgs() { return hasVarArgs_; } - public boolean isPersistent() { return isPersistent_; } - public boolean userVisible() { return userVisible_; } - public Type getVarArgsType() { - if (!hasVarArgs_) return Type.INVALID; - Preconditions.checkState(argTypes_.length > 0); - return argTypes_[argTypes_.length - 1]; - } - - public void setName(FunctionName name) { name_ = name; } - public void setLocation(HdfsUri loc) { location_ = loc; } - public void setBinaryType(TFunctionBinaryType type) { binaryType_ = type; } - public void setHasVarArgs(boolean v) { hasVarArgs_ = v; } - public void setIsPersistent(boolean v) { isPersistent_ = v; } - public void setUserVisible(boolean b) { userVisible_ = b; } - - // Returns a string with the signature in human readable format: - // FnName(argtype1, argtyp2). e.g. Add(int, int) - public String signatureString() { - StringBuilder sb = new StringBuilder(); - sb.append(name_.getFunction()) - .append("(") - .append(Joiner.on(", ").join(argTypes_)); - if (hasVarArgs_) sb.append("..."); - sb.append(")"); - return sb.toString(); - } - - @Override - public boolean equals(Object o) { - if (!(o instanceof Function)) return false; - return compare((Function)o, CompareMode.IS_IDENTICAL); - } - - // Compares this to 'other' for mode. - public boolean compare(Function other, CompareMode mode) { - switch (mode) { - case IS_IDENTICAL: return isIdentical(other); - case IS_INDISTINGUISHABLE: return isIndistinguishable(other); - case IS_SUPERTYPE_OF: return isSuperTypeOf(other, true); - case IS_NONSTRICT_SUPERTYPE_OF: return isSuperTypeOf(other, false); - default: - Preconditions.checkState(false); - return false; - } - } - /** - * Returns true if 'this' is a supertype of 'other'. Each argument in other must - * be implicitly castable to the matching argument in this. If strict is true, - * only consider conversions where there is no loss of precision. - */ - private boolean isSuperTypeOf(Function other, boolean strict) { - if (!other.name_.equals(name_)) return false; - if (!this.hasVarArgs_ && other.argTypes_.length != this.argTypes_.length) { - return false; - } - if (this.hasVarArgs_ && other.argTypes_.length < this.argTypes_.length) return false; - for (int i = 0; i < this.argTypes_.length; ++i) { - if (!Type.isImplicitlyCastable(other.argTypes_[i], this.argTypes_[i], strict)) { - return false; - } - } - // Check trailing varargs. - if (this.hasVarArgs_) { - for (int i = this.argTypes_.length; i < other.argTypes_.length; ++i) { - if (other.argTypes_[i].matchesType(this.getVarArgsType())) continue; - if (!Type.isImplicitlyCastable(other.argTypes_[i], this.getVarArgsType(), - strict)) { - return false; - } - } - } - return true; - } - - /** - * Converts any CHAR arguments to be STRING arguments - */ - public Function promoteCharsToStrings() { - Type[] promoted = argTypes_.clone(); - for (int i = 0; i < promoted.length; ++i) { - if (promoted[i].isScalarType(PrimitiveType.CHAR)) promoted[i] = ScalarType.STRING; - } - return new Function(name_, promoted, retType_, hasVarArgs_); - } - - /** - * Given a list of functions which are a super type of this function, select the best - * match. This is the one which requires the fewest type promotions. - */ - public Function selectClosestSuperType(List<Function> candidates) { - Preconditions.checkArgument(candidates.size() > 0); - if (candidates.size() == 1) return candidates.get(0); - - // Always promote CHAR to STRING before attempting any other promotions. - Function withStrs = promoteCharsToStrings(); - for (Function f: candidates) { - if (withStrs.isIndistinguishable(f)) return f; - } - // Otherwise, we use the previous rules of resolution which are to take the first - // one in the list. - return candidates.get(0); - } - - private boolean isIdentical(Function o) { - if (!o.name_.equals(name_)) return false; - if (o.argTypes_.length != this.argTypes_.length) return false; - if (o.hasVarArgs_ != this.hasVarArgs_) return false; - for (int i = 0; i < this.argTypes_.length; ++i) { - if (!o.argTypes_[i].matchesType(this.argTypes_[i])) return false; - } - return true; - } - - private boolean isIndistinguishable(Function o) { - if (!o.name_.equals(name_)) return false; - int minArgs = Math.min(o.argTypes_.length, this.argTypes_.length); - // The first fully specified args must be identical. - for (int i = 0; i < minArgs; ++i) { - if (o.argTypes_[i].isNull() || this.argTypes_[i].isNull()) continue; - if (!o.argTypes_[i].matchesType(this.argTypes_[i])) return false; - } - if (o.argTypes_.length == this.argTypes_.length) return true; - - if (o.hasVarArgs_ && this.hasVarArgs_) { - if (!o.getVarArgsType().matchesType(this.getVarArgsType())) return false; - if (this.getNumArgs() > o.getNumArgs()) { - for (int i = minArgs; i < this.getNumArgs(); ++i) { - if (this.argTypes_[i].isNull()) continue; - if (!this.argTypes_[i].matchesType(o.getVarArgsType())) return false; - } - } else { - for (int i = minArgs; i < o.getNumArgs(); ++i) { - if (o.argTypes_[i].isNull()) continue; - if (!o.argTypes_[i].matchesType(this.getVarArgsType())) return false; - } - } - return true; - } else if (o.hasVarArgs_) { - // o has var args so check the remaining arguments from this - if (o.getNumArgs() > minArgs) return false; - for (int i = minArgs; i < this.getNumArgs(); ++i) { - if (this.argTypes_[i].isNull()) continue; - if (!this.argTypes_[i].matchesType(o.getVarArgsType())) return false; - } - return true; - } else if (this.hasVarArgs_) { - // this has var args so check the remaining arguments from s - if (this.getNumArgs() > minArgs) return false; - for (int i = minArgs; i < o.getNumArgs(); ++i) { - if (o.argTypes_[i].isNull()) continue; - if (!o.argTypes_[i].matchesType(this.getVarArgsType())) return false; - } - return true; - } else { - // Neither has var args and the lengths don't match - return false; - } - } - - @Override - public TCatalogObjectType getCatalogObjectType() { return TCatalogObjectType.FUNCTION; } - - @Override - public long getCatalogVersion() { return catalogVersion_; } - - @Override - public void setCatalogVersion(long newVersion) { catalogVersion_ = newVersion; } - - @Override - public String getName() { return getFunctionName().toString(); } - - // Child classes must override this function. - public String toSql(boolean ifNotExists) { return ""; } - - public TFunction toThrift() { - TFunction fn = new TFunction(); - fn.setSignature(signatureString()); - fn.setName(name_.toThrift()); - fn.setBinary_type(binaryType_); - if (location_ != null) fn.setHdfs_location(location_.toString()); - fn.setArg_types(Type.toThrift(argTypes_)); - fn.setRet_type(getReturnType().toThrift()); - fn.setHas_var_args(hasVarArgs_); - fn.setIs_persistent(isPersistent_); - // TODO: Comment field is missing? - // fn.setComment(comment_) - return fn; - } - - public static Function fromThrift(TFunction fn) { - List<Type> argTypes = Lists.newArrayList(); - for (TColumnType t: fn.getArg_types()) { - argTypes.add(Type.fromThrift(t)); - } - - Function function = null; - if (fn.isSetScalar_fn()) { - TScalarFunction scalarFn = fn.getScalar_fn(); - function = new ScalarFunction(FunctionName.fromThrift(fn.getName()), argTypes, - Type.fromThrift(fn.getRet_type()), new HdfsUri(fn.getHdfs_location()), - scalarFn.getSymbol(), scalarFn.getPrepare_fn_symbol(), - scalarFn.getClose_fn_symbol()); - } else if (fn.isSetAggregate_fn()) { - TAggregateFunction aggFn = fn.getAggregate_fn(); - function = new AggregateFunction(FunctionName.fromThrift(fn.getName()), argTypes, - Type.fromThrift(fn.getRet_type()), - Type.fromThrift(aggFn.getIntermediate_type()), - new HdfsUri(fn.getHdfs_location()), aggFn.getUpdate_fn_symbol(), - aggFn.getInit_fn_symbol(), aggFn.getSerialize_fn_symbol(), - aggFn.getMerge_fn_symbol(), aggFn.getGet_value_fn_symbol(), - null, aggFn.getFinalize_fn_symbol()); - } else { - // In the case where we are trying to look up the object, we only have the - // signature. - function = new Function(FunctionName.fromThrift(fn.getName()), - argTypes, Type.fromThrift(fn.getRet_type()), fn.isHas_var_args()); - } - function.setBinaryType(fn.getBinary_type()); - function.setHasVarArgs(fn.isHas_var_args()); - if (fn.isSetIs_persistent()) { - function.setIsPersistent(fn.isIs_persistent()); - } else { - function.setIsPersistent(false); - } - return function; - } - - @Override - public boolean isLoaded() { return true; } - - // Returns the resolved symbol in the binary. The BE will do a lookup of 'symbol' - // in the binary and try to resolve unmangled names. - // If this function is expecting a return argument, retArgType is that type. It should - // be null if this function isn't expecting a return argument. - public String lookupSymbol(String symbol, TSymbolType symbolType, Type retArgType, - boolean hasVarArgs, Type... argTypes) throws AnalysisException { - if (symbol.length() == 0) { - if (binaryType_ == TFunctionBinaryType.BUILTIN) { - // We allow empty builtin symbols in order to stage work in the FE before its - // implemented in the BE - return symbol; - } - throw new AnalysisException("Could not find symbol ''"); - } - - TSymbolLookupParams lookup = new TSymbolLookupParams(); - // Builtin functions do not have an external library, they are loaded directly from - // the running process - lookup.location = binaryType_ != TFunctionBinaryType.BUILTIN ? - location_.toString() : ""; - lookup.symbol = symbol; - lookup.symbol_type = symbolType; - lookup.fn_binary_type = binaryType_; - lookup.arg_types = Type.toThrift(argTypes); - lookup.has_var_args = hasVarArgs; - if (retArgType != null) lookup.setRet_arg_type(retArgType.toThrift()); - - try { - TSymbolLookupResult result = FeSupport.LookupSymbol(lookup); - switch (result.result_code) { - case SYMBOL_FOUND: - return result.symbol; - case BINARY_NOT_FOUND: - Preconditions.checkState(binaryType_ != TFunctionBinaryType.BUILTIN); - throw new AnalysisException( - "Could not load binary: " + location_.getLocation() + "\n" + - result.error_msg); - case SYMBOL_NOT_FOUND: - throw new AnalysisException(result.error_msg); - default: - // Should never get here. - throw new AnalysisException("Internal Error"); - } - } catch (InternalException e) { - // Should never get here. - e.printStackTrace(); - throw new AnalysisException("Could not find symbol: " + symbol, e); - } - } - - public String lookupSymbol(String symbol, TSymbolType symbolType) - throws AnalysisException { - Preconditions.checkState( - symbolType == TSymbolType.UDF_PREPARE || symbolType == TSymbolType.UDF_CLOSE); - return lookupSymbol(symbol, symbolType, null, false); - } - - public static String getUdfType(Type t) { - switch (t.getPrimitiveType()) { - case BOOLEAN: - return "BooleanVal"; - case TINYINT: - return "TinyIntVal"; - case SMALLINT: - return "SmallIntVal"; - case INT: - return "IntVal"; - case BIGINT: - return "BigIntVal"; - case FLOAT: - return "FloatVal"; - case DOUBLE: - return "DoubleVal"; - case STRING: - case VARCHAR: - case CHAR: - return "StringVal"; - case TIMESTAMP: - return "TimestampVal"; - case DECIMAL: - return "DecimalVal"; - default: - Preconditions.checkState(false, t.toString()); - return ""; - } - } - - /** - * Returns true if the given function matches the specified category. - */ - public static boolean categoryMatch(Function fn, TFunctionCategory category) { - Preconditions.checkNotNull(category); - return (category == TFunctionCategory.SCALAR && fn instanceof ScalarFunction) - || (category == TFunctionCategory.AGGREGATE - && fn instanceof AggregateFunction - && ((AggregateFunction)fn).isAggregateFn()) - || (category == TFunctionCategory.ANALYTIC - && fn instanceof AggregateFunction - && ((AggregateFunction)fn).isAnalyticFn()); - } -} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/catalog/HBaseColumn.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/catalog/HBaseColumn.java b/fe/src/main/java/com/cloudera/impala/catalog/HBaseColumn.java deleted file mode 100644 index 37fa853..0000000 --- a/fe/src/main/java/com/cloudera/impala/catalog/HBaseColumn.java +++ /dev/null @@ -1,67 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package com.cloudera.impala.catalog; - -import com.cloudera.impala.thrift.TColumn; - -// Describes an HBase column mapped to a Hive column (as described in the metastore). -// this.name describes the column name in Hive. -// This class adds the HBase columnFamily and columnQualifier, -// so we can read the column from HBase directly. -public class HBaseColumn extends Column implements Comparable<HBaseColumn> { - private final String columnFamily_; - private final String columnQualifier_; - private final boolean binaryEncoded_; - - public HBaseColumn(String name, String columnFamily, String columnQualifier, - boolean binaryEncoded, Type type, String comment, int position) { - super(name, type, comment, position); - columnFamily_ = columnFamily; - columnQualifier_ = columnQualifier; - binaryEncoded_ = binaryEncoded; - } - - public String getColumnFamily() { return columnFamily_; } - public String getColumnQualifier() { return columnQualifier_; } - public boolean isBinaryEncoded() { return binaryEncoded_; } - - @Override - // We order the HBase columns in the matadata based on columnFamily,columnQualifier, - // to more easily map slots from HBase's Result.raw() to target slots in the backend. - public int compareTo(HBaseColumn o) { - int familyCmp = columnFamily_.compareTo(o.columnFamily_); - if (familyCmp != 0) { - return familyCmp; - } - int qualifierCmp = columnQualifier_.compareTo(o.columnQualifier_); - return qualifierCmp; - } - - @Override - public TColumn toThrift() { - TColumn colDesc = new TColumn(name_, type_.toThrift()); - if (comment_ != null) colDesc.setComment(comment_); - colDesc.setCol_stats(getStats().toThrift()); - colDesc.setPosition(position_); - colDesc.setIs_hbase_column(true); - colDesc.setColumn_family(columnFamily_); - colDesc.setColumn_qualifier(columnQualifier_); - colDesc.setIs_binary(binaryEncoded_); - return colDesc; - } -} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/catalog/HBaseTable.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/catalog/HBaseTable.java b/fe/src/main/java/com/cloudera/impala/catalog/HBaseTable.java deleted file mode 100644 index d96314e..0000000 --- a/fe/src/main/java/com/cloudera/impala/catalog/HBaseTable.java +++ /dev/null @@ -1,853 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package com.cloudera.impala.catalog; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.Cell; -import org.apache.hadoop.hbase.ClusterStatus; -import org.apache.hadoop.hbase.HBaseConfiguration; -import org.apache.hadoop.hbase.HColumnDescriptor; -import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HRegionLocation; -import org.apache.hadoop.hbase.HTableDescriptor; -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.ServerLoad; -import org.apache.hadoop.hbase.ServerName; -import org.apache.hadoop.hbase.RegionLoad; -import org.apache.hadoop.hbase.client.Admin; -import org.apache.hadoop.hbase.client.Connection; -import org.apache.hadoop.hbase.client.ConnectionFactory; -import org.apache.hadoop.hbase.client.RegionLocator; -import org.apache.hadoop.hbase.client.Result; -import org.apache.hadoop.hbase.client.ResultScanner; -import org.apache.hadoop.hbase.client.Scan; -import org.apache.hadoop.hbase.io.compress.Compression; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.TableName; -import org.apache.hadoop.hive.hbase.HBaseSerDe; -import org.apache.hadoop.hive.metastore.IMetaStoreClient; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.log4j.Logger; - -import com.cloudera.impala.common.Pair; -import com.cloudera.impala.thrift.TCatalogObjectType; -import com.cloudera.impala.thrift.TColumn; -import com.cloudera.impala.thrift.THBaseTable; -import com.cloudera.impala.thrift.TResultSet; -import com.cloudera.impala.thrift.TResultSetMetadata; -import com.cloudera.impala.thrift.TTable; -import com.cloudera.impala.thrift.TTableDescriptor; -import com.cloudera.impala.thrift.TTableType; -import com.cloudera.impala.util.StatsHelper; -import com.cloudera.impala.util.TResultRowBuilder; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; - -/** - * Impala representation of HBase table metadata, - * as loaded from Hive's metastore. - * This implies that we inherit the metastore's limitations related to HBase, - * for example the lack of support for composite HBase row keys. - * We sort the HBase columns (cols) by family/qualifier - * to simplify the retrieval logic in the backend, since - * HBase returns data ordered by family/qualifier. - * This implies that a "select *"-query on an HBase table - * will not have the columns ordered as they were declared in the DDL. - * They will be ordered by family/qualifier. - * - */ -public class HBaseTable extends Table { - // Maximum deviation from the average to stop querying more regions - // to estimate the row count - private static final double DELTA_FROM_AVERAGE = 0.15; - - private static final Logger LOG = Logger.getLogger(HBaseTable.class); - - // Copied from Hive's HBaseStorageHandler.java. - public static final String DEFAULT_PREFIX = "default."; - - // Number of rows fetched during the row count estimation per region - public static final int ROW_COUNT_ESTIMATE_BATCH_SIZE = 10; - - // Minimum number of regions that are checked to estimate the row count - private static final int MIN_NUM_REGIONS_TO_CHECK = 5; - - // Column referring to HBase row key. - // Hive (including metastore) currently doesn't support composite HBase keys. - protected HBaseColumn rowKey_; - - // Name of table in HBase. - // 'this.name' is the alias of the HBase table in Hive. - protected String hbaseTableName_; - - // Input format class for HBase tables read by Hive. - private static final String HBASE_INPUT_FORMAT = - "org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat"; - - // Serialization class for HBase tables set in the corresponding Metastore table. - private static final String HBASE_SERIALIZATION_LIB = - "org.apache.hadoop.hive.hbase.HBaseSerDe"; - - // Storage handler class for HBase tables read by Hive. - private static final String HBASE_STORAGE_HANDLER = - "org.apache.hadoop.hive.hbase.HBaseStorageHandler"; - - // Column family of HBase row key - private static final String ROW_KEY_COLUMN_FAMILY = ":key"; - - // Keep the conf around - private final static Configuration hbaseConf_ = HBaseConfiguration.create(); - - // Cached column families. Used primarily for speeding up row stats estimation - // (see CDH-19292). - private HColumnDescriptor[] columnFamilies_ = null; - - protected HBaseTable(TableId id, org.apache.hadoop.hive.metastore.api.Table msTbl, - Db db, String name, String owner) { - super(id, msTbl, db, name, owner); - } - - /** - * Connection instances are expensive to create. The HBase documentation recommends - * one and then sharing it among threads. All operations on a connection are - * thread-safe. - */ - private static class ConnectionHolder { - private static Connection connection_ = null; - - public static synchronized Connection getConnection(Configuration conf) - throws IOException { - if (connection_ == null || connection_.isClosed()) { - connection_ = ConnectionFactory.createConnection(conf); - } - return connection_; - } - } - - /** - * Table client objects are thread-unsafe and cheap to create. The HBase docs recommend - * creating a new one for each task and then closing when done. - */ - public org.apache.hadoop.hbase.client.Table getHBaseTable() throws IOException { - return ConnectionHolder.getConnection(hbaseConf_) - .getTable(TableName.valueOf(hbaseTableName_)); - } - - private void closeHBaseTable(org.apache.hadoop.hbase.client.Table table) { - try { - table.close(); - } catch (IOException e) { - LOG.error("Error closing HBase table: " + hbaseTableName_, e); - } - } - - /** - * Get the cluster status, making sure we close the admin client afterwards. - */ - public ClusterStatus getClusterStatus() throws IOException { - Admin admin = null; - ClusterStatus clusterStatus = null; - try { - Connection connection = ConnectionHolder.getConnection(hbaseConf_); - admin = connection.getAdmin(); - clusterStatus = admin.getClusterStatus(); - } finally { - if (admin != null) admin.close(); - } - return clusterStatus; - } - - /** - * Parse the column description string to the column families and column - * qualifies. This is a copy of HBaseSerDe.parseColumnMapping and - * parseColumnStorageTypes with parts we don't use removed. The hive functions - * are not public. - - * tableDefaultStorageIsBinary - true if table is default to binary encoding - * columnsMappingSpec - input string format describing the table - * fieldSchemas - input field schema from metastore table - * columnFamilies/columnQualifiers/columnBinaryEncodings - out parameters that will be - * filled with the column family, column qualifier and encoding for each column. - */ - private void parseColumnMapping(boolean tableDefaultStorageIsBinary, - String columnsMappingSpec, List<FieldSchema> fieldSchemas, - List<String> columnFamilies, List<String> columnQualifiers, - List<Boolean> colIsBinaryEncoded) throws SerDeException { - if (columnsMappingSpec == null) { - throw new SerDeException( - "Error: hbase.columns.mapping missing for this HBase table."); - } - - if (columnsMappingSpec.equals("") || - columnsMappingSpec.equals(HBaseSerDe.HBASE_KEY_COL)) { - throw new SerDeException("Error: hbase.columns.mapping specifies only " - + "the HBase table row key. A valid Hive-HBase table must specify at " - + "least one additional column."); - } - - int rowKeyIndex = -1; - String[] columnSpecs = columnsMappingSpec.split(","); - // If there was an implicit key column mapping, the number of columns (fieldSchemas) - // will be one more than the number of column mapping specs. - int fsStartIdxOffset = fieldSchemas.size() - columnSpecs.length; - if (fsStartIdxOffset != 0 && fsStartIdxOffset != 1) { - // This should never happen - Hive blocks creating a mismatched table and both Hive - // and Impala currently block all column-level DDL on HBase tables. - throw new SerDeException(String.format("Number of entries in " + - "'hbase.columns.mapping' does not match the number of columns in the " + - "table: %d != %d (counting the key if implicit)", - columnSpecs.length, fieldSchemas.size())); - } - - for (int i = 0; i < columnSpecs.length; ++i) { - String mappingSpec = columnSpecs[i]; - String[] mapInfo = mappingSpec.split("#"); - // Trim column info so that serdeproperties with new lines still parse correctly. - String colInfo = mapInfo[0].trim(); - - int idxFirst = colInfo.indexOf(":"); - int idxLast = colInfo.lastIndexOf(":"); - - if (idxFirst < 0 || !(idxFirst == idxLast)) { - throw new SerDeException("Error: the HBase columns mapping contains a " - + "badly formed column family, column qualifier specification."); - } - - if (colInfo.equals(HBaseSerDe.HBASE_KEY_COL)) { - Preconditions.checkState(fsStartIdxOffset == 0); - rowKeyIndex = i; - columnFamilies.add(colInfo); - columnQualifiers.add(null); - } else { - String[] parts = colInfo.split(":"); - Preconditions.checkState(parts.length > 0 && parts.length <= 2); - columnFamilies.add(parts[0]); - if (parts.length == 2) { - columnQualifiers.add(parts[1]); - } else { - columnQualifiers.add(null); - } - } - - // Set column binary encoding - FieldSchema fieldSchema = fieldSchemas.get(i + fsStartIdxOffset); - boolean supportsBinaryEncoding = supportsBinaryEncoding(fieldSchema); - if (mapInfo.length == 1) { - // There is no column level storage specification. Use the table storage spec. - colIsBinaryEncoded.add( - new Boolean(tableDefaultStorageIsBinary && supportsBinaryEncoding)); - } else if (mapInfo.length == 2) { - // There is a storage specification for the column - String storageOption = mapInfo[1]; - - if (!(storageOption.equals("-") || "string".startsWith(storageOption) || "binary" - .startsWith(storageOption))) { - throw new SerDeException("Error: A column storage specification is one of" - + " the following: '-', a prefix of 'string', or a prefix of 'binary'. " - + storageOption + " is not a valid storage option specification for " - + fieldSchema.getName()); - } - - boolean isBinaryEncoded = false; - if ("-".equals(storageOption)) { - isBinaryEncoded = tableDefaultStorageIsBinary; - } else if ("binary".startsWith(storageOption)) { - isBinaryEncoded = true; - } - if (isBinaryEncoded && !supportsBinaryEncoding) { - // Use string encoding and log a warning if the column spec is binary but the - // column type does not support it. - // TODO: Hive/HBase does not raise an exception, but should we? - LOG.warn("Column storage specification for column " + fieldSchema.getName() - + " is binary" + " but the column type " + fieldSchema.getType() + - " does not support binary encoding. Fallback to string format."); - isBinaryEncoded = false; - } - colIsBinaryEncoded.add(isBinaryEncoded); - } else { - // error in storage specification - throw new SerDeException("Error: " + HBaseSerDe.HBASE_COLUMNS_MAPPING - + " storage specification " + mappingSpec + " is not valid for column: " - + fieldSchema.getName()); - } - } - - if (rowKeyIndex == -1) { - columnFamilies.add(0, HBaseSerDe.HBASE_KEY_COL); - columnQualifiers.add(0, null); - colIsBinaryEncoded.add(0, - supportsBinaryEncoding(fieldSchemas.get(0)) && tableDefaultStorageIsBinary); - } - } - - private boolean supportsBinaryEncoding(FieldSchema fs) { - try { - Type colType = parseColumnType(fs); - // Only boolean, integer and floating point types can use binary storage. - return colType.isBoolean() || colType.isIntegerType() - || colType.isFloatingPointType(); - } catch (TableLoadingException e) { - return false; - } - } - - @Override - /** - * For hbase tables, we can support tables with columns we don't understand at - * all (e.g. map) as long as the user does not select those. This is in contrast - * to hdfs tables since we typically need to understand all columns to make sense - * of the file at all. - */ - public void load(boolean reuseMetadata, IMetaStoreClient client, - org.apache.hadoop.hive.metastore.api.Table msTbl) throws TableLoadingException { - Preconditions.checkNotNull(getMetaStoreTable()); - try { - msTable_ = msTbl; - hbaseTableName_ = getHBaseTableName(getMetaStoreTable()); - // Warm up the connection and verify the table exists. - getHBaseTable().close(); - columnFamilies_ = null; - Map<String, String> serdeParams = - getMetaStoreTable().getSd().getSerdeInfo().getParameters(); - String hbaseColumnsMapping = serdeParams.get(HBaseSerDe.HBASE_COLUMNS_MAPPING); - if (hbaseColumnsMapping == null) { - throw new MetaException("No hbase.columns.mapping defined in Serde."); - } - - String hbaseTableDefaultStorageType = getMetaStoreTable().getParameters().get( - HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE); - boolean tableDefaultStorageIsBinary = false; - if (hbaseTableDefaultStorageType != null && - !hbaseTableDefaultStorageType.isEmpty()) { - if (hbaseTableDefaultStorageType.equalsIgnoreCase("binary")) { - tableDefaultStorageIsBinary = true; - } else if (!hbaseTableDefaultStorageType.equalsIgnoreCase("string")) { - throw new SerDeException("Error: " + - HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE + - " parameter must be specified as" + - " 'string' or 'binary'; '" + hbaseTableDefaultStorageType + - "' is not a valid specification for this table/serde property."); - } - } - - // Parse HBase column-mapping string. - List<FieldSchema> fieldSchemas = getMetaStoreTable().getSd().getCols(); - List<String> hbaseColumnFamilies = new ArrayList<String>(); - List<String> hbaseColumnQualifiers = new ArrayList<String>(); - List<Boolean> hbaseColumnBinaryEncodings = new ArrayList<Boolean>(); - parseColumnMapping(tableDefaultStorageIsBinary, hbaseColumnsMapping, fieldSchemas, - hbaseColumnFamilies, hbaseColumnQualifiers, hbaseColumnBinaryEncodings); - Preconditions.checkState( - hbaseColumnFamilies.size() == hbaseColumnQualifiers.size()); - Preconditions.checkState(fieldSchemas.size() == hbaseColumnFamilies.size()); - - // Populate tmp cols in the order they appear in the Hive metastore. - // We will reorder the cols below. - List<HBaseColumn> tmpCols = Lists.newArrayList(); - // Store the key column separately. - // TODO: Change this to an ArrayList once we support composite row keys. - HBaseColumn keyCol = null; - for (int i = 0; i < fieldSchemas.size(); ++i) { - FieldSchema s = fieldSchemas.get(i); - Type t = Type.INVALID; - try { - t = parseColumnType(s); - } catch (TableLoadingException e) { - // Ignore hbase types we don't support yet. We can load the metadata - // but won't be able to select from it. - } - HBaseColumn col = new HBaseColumn(s.getName(), hbaseColumnFamilies.get(i), - hbaseColumnQualifiers.get(i), hbaseColumnBinaryEncodings.get(i), - t, s.getComment(), -1); - if (col.getColumnFamily().equals(ROW_KEY_COLUMN_FAMILY)) { - // Store the row key column separately from the rest - keyCol = col; - } else { - tmpCols.add(col); - } - } - Preconditions.checkState(keyCol != null); - - // The backend assumes that the row key column is always first and - // that the remaining HBase columns are ordered by columnFamily,columnQualifier, - // so the final position depends on the other mapped HBase columns. - // Sort columns and update positions. - Collections.sort(tmpCols); - clearColumns(); - - keyCol.setPosition(0); - addColumn(keyCol); - // Update the positions of the remaining columns - for (int i = 0; i < tmpCols.size(); ++i) { - HBaseColumn col = tmpCols.get(i); - col.setPosition(i + 1); - addColumn(col); - } - - // Set table stats. - numRows_ = getRowCount(super.getMetaStoreTable().getParameters()); - - // since we don't support composite hbase rowkeys yet, all hbase tables have a - // single clustering col - numClusteringCols_ = 1; - loadAllColumnStats(client); - } catch (Exception e) { - throw new TableLoadingException("Failed to load metadata for HBase table: " + - name_, e); - } - } - - @Override - protected void loadFromThrift(TTable table) throws TableLoadingException { - super.loadFromThrift(table); - try { - hbaseTableName_ = getHBaseTableName(getMetaStoreTable()); - // Warm up the connection and verify the table exists. - getHBaseTable().close(); - columnFamilies_ = null; - } catch (Exception e) { - throw new TableLoadingException("Failed to load metadata for HBase table from " + - "thrift table: " + name_, e); - } - } - - /** - * This method is completely copied from Hive's HBaseStorageHandler.java. - */ - private String getHBaseTableName(org.apache.hadoop.hive.metastore.api.Table tbl) { - // Give preference to TBLPROPERTIES over SERDEPROPERTIES - // (really we should only use TBLPROPERTIES, so this is just - // for backwards compatibility with the original specs). - String tableName = tbl.getParameters().get(HBaseSerDe.HBASE_TABLE_NAME); - if (tableName == null) { - tableName = tbl.getSd().getSerdeInfo().getParameters().get( - HBaseSerDe.HBASE_TABLE_NAME); - } - if (tableName == null) { - tableName = tbl.getDbName() + "." + tbl.getTableName(); - if (tableName.startsWith(DEFAULT_PREFIX)) { - tableName = tableName.substring(DEFAULT_PREFIX.length()); - } - } - return tableName; - } - - /** - * Estimates the number of rows for a single region and returns a pair with - * the estimated row count and the estimated size in bytes per row. - */ - private Pair<Long, Long> getEstimatedRowStatsForRegion(HRegionLocation location, - boolean isCompressed, ClusterStatus clusterStatus) throws IOException { - HRegionInfo info = location.getRegionInfo(); - - Scan s = new Scan(info.getStartKey()); - // Get a small sample of rows - s.setBatch(ROW_COUNT_ESTIMATE_BATCH_SIZE); - // Try and get every version so the row's size can be used to estimate. - s.setMaxVersions(Short.MAX_VALUE); - // Don't cache the blocks as we don't think these are - // necessarily important blocks. - s.setCacheBlocks(false); - // Try and get deletes too so their size can be counted. - s.setRaw(false); - - org.apache.hadoop.hbase.client.Table table = getHBaseTable(); - ResultScanner rs = table.getScanner(s); - - long currentRowSize = 0; - long currentRowCount = 0; - - try { - // Get the the ROW_COUNT_ESTIMATE_BATCH_SIZE fetched rows - // for a representative sample - for (int i = 0; i < ROW_COUNT_ESTIMATE_BATCH_SIZE; ++i) { - Result r = rs.next(); - if (r == null) - break; - // Check for empty rows, see IMPALA-1451 - if (r.isEmpty()) - continue; - ++currentRowCount; - // To estimate the number of rows we simply use the amount of bytes - // returned from the underlying buffer. Since HBase internally works - // with these structures as well this gives us ok estimates. - Cell[] cells = r.rawCells(); - for (Cell c : cells) { - if (c instanceof KeyValue) { - currentRowSize += KeyValue.getKeyValueDataStructureSize(c.getRowLength(), - c.getFamilyLength(), c.getQualifierLength(), c.getValueLength(), - c.getTagsLength()); - } else { - throw new IllegalStateException("Celltype " + c.getClass().getName() + - " not supported."); - } - } - } - } finally { - rs.close(); - closeHBaseTable(table); - } - - // If there are no rows then no need to estimate. - if (currentRowCount == 0) return new Pair<Long, Long>(0L, 0L); - // Get the size. - long currentSize = getRegionSize(location, clusterStatus); - // estimate the number of rows. - double bytesPerRow = currentRowSize / (double) currentRowCount; - if (currentSize == 0) { - return new Pair<Long, Long>(currentRowCount, (long) bytesPerRow); - } - - // Compression factor two is only a best effort guess - long estimatedRowCount = - (long) ((isCompressed ? 2 : 1) * (currentSize / bytesPerRow)); - - return new Pair<Long, Long>(estimatedRowCount, (long) bytesPerRow); - } - - /** - * Get an estimate of the number of rows and bytes per row in regions between - * startRowKey and endRowKey. - * - * This number is calculated by incrementally checking as many region servers as - * necessary until we observe a relatively constant row size per region on average. - * Depending on the skew of data in the regions this can either mean that we need - * to check only a minimal number of regions or that we will scan all regions. - * - * The HBase region servers periodically update the master with their metrics, - * including storefile size. We get the size of the storefiles for all regions in - * the cluster with a single call to getClusterStatus from the master. - * - * The accuracy of this number is determined by the number of rows that are written - * and kept in the memstore and have not been flushed until now. A large number - * of key-value pairs in the memstore will lead to bad estimates as this number - * is not reflected in the storefile size that is used to estimate this number. - * - * Currently, the algorithm does not consider the case that the key range used as a - * parameter might be generally of different size than the rest of the region. - * - * The values computed here should be cached so that in high qps workloads - * the nn is not overwhelmed. Could be done in load(); Synchronized to make - * sure that only one thread at a time is using the htable. - * - * @param startRowKey - * First row key in the range - * @param endRowKey - * Last row key in the range - * @return The estimated number of rows in the regions between the row keys (first) and - * the estimated row size in bytes (second). - */ - public synchronized Pair<Long, Long> getEstimatedRowStats(byte[] startRowKey, - byte[] endRowKey) { - Preconditions.checkNotNull(startRowKey); - Preconditions.checkNotNull(endRowKey); - - boolean isCompressed = false; - long rowCount = 0; - long rowSize = 0; - - org.apache.hadoop.hbase.client.Table table = null; - try { - table = getHBaseTable(); - ClusterStatus clusterStatus = getClusterStatus(); - - // Check to see if things are compressed. - // If they are we'll estimate a compression factor. - if (columnFamilies_ == null) { - columnFamilies_ = table.getTableDescriptor().getColumnFamilies(); - } - Preconditions.checkNotNull(columnFamilies_); - for (HColumnDescriptor desc : columnFamilies_) { - isCompressed |= desc.getCompression() != Compression.Algorithm.NONE; - } - - // Fetch all regions for the key range - List<HRegionLocation> locations = getRegionsInRange(table, startRowKey, endRowKey); - Collections.shuffle(locations); - // The following variables track the number and size of 'rows' in - // HBase and allow incremental calculation of the average and standard - // deviation. - StatsHelper<Long> statsSize = new StatsHelper<Long>(); - long totalEstimatedRows = 0; - - // Collects stats samples from at least MIN_NUM_REGIONS_TO_CHECK - // and at most all regions until the delta is small enough. - while ((statsSize.count() < MIN_NUM_REGIONS_TO_CHECK || - statsSize.stddev() > statsSize.mean() * DELTA_FROM_AVERAGE) && - statsSize.count() < locations.size()) { - HRegionLocation currentLocation = locations.get((int) statsSize.count()); - Pair<Long, Long> tmp = getEstimatedRowStatsForRegion(currentLocation, - isCompressed, clusterStatus); - totalEstimatedRows += tmp.first; - statsSize.addSample(tmp.second); - } - - // Sum up the total size for all regions in range. - long totalSize = 0; - for (final HRegionLocation location : locations) { - totalSize += getRegionSize(location, clusterStatus); - } - if (totalSize == 0) { - rowCount = totalEstimatedRows; - } else { - rowCount = (long) (totalSize / statsSize.mean()); - } - rowSize = (long) statsSize.mean(); - } catch (IOException ioe) { - // Print the stack trace, but we'll ignore it - // as this is just an estimate. - // TODO: Put this into the per query log. - LOG.error("Error computing HBase row count estimate", ioe); - return new Pair<Long, Long>(-1l, -1l); - } finally { - if (table != null) closeHBaseTable(table); - } - return new Pair<Long, Long>(rowCount, rowSize); - } - - /** - * Returns the size of the given region in bytes. Simply returns the storefile size - * for this region from the ClusterStatus. Returns 0 in case of an error. - */ - public long getRegionSize(HRegionLocation location, ClusterStatus clusterStatus) { - HRegionInfo info = location.getRegionInfo(); - ServerLoad serverLoad = clusterStatus.getLoad(location.getServerName()); - - // If the serverLoad is null, the master doesn't have information for this region's - // server. This shouldn't normally happen. - if (serverLoad == null) { - LOG.error("Unable to find load for server: " + location.getServerName() + - " for location " + info.getRegionNameAsString()); - return 0; - } - RegionLoad regionLoad = serverLoad.getRegionsLoad().get(info.getRegionName()); - - final long megaByte = 1024L * 1024L; - return regionLoad.getStorefileSizeMB() * megaByte; - } - - /** - * Hive returns the columns in order of their declaration for HBase tables. - */ - @Override - public ArrayList<Column> getColumnsInHiveOrder() { - return getColumns(); - } - - @Override - public TTableDescriptor toThriftDescriptor(Set<Long> referencedPartitions) { - TTableDescriptor tableDescriptor = - new TTableDescriptor(id_.asInt(), TTableType.HBASE_TABLE, - getTColumnDescriptors(), numClusteringCols_, hbaseTableName_, db_.getName()); - tableDescriptor.setHbaseTable(getTHBaseTable()); - return tableDescriptor; - } - - public String getHBaseTableName() { - return hbaseTableName_; - } - - public static Configuration getHBaseConf() { - return hbaseConf_; - } - - public int getNumNodes() { - // TODO: implement - return 100; - } - - @Override - public TCatalogObjectType getCatalogObjectType() { - return TCatalogObjectType.TABLE; - } - - @Override - public TTable toThrift() { - TTable table = super.toThrift(); - table.setTable_type(TTableType.HBASE_TABLE); - table.setHbase_table(getTHBaseTable()); - return table; - } - - private THBaseTable getTHBaseTable() { - THBaseTable tHbaseTable = new THBaseTable(); - tHbaseTable.setTableName(hbaseTableName_); - for (Column c : getColumns()) { - HBaseColumn hbaseCol = (HBaseColumn) c; - tHbaseTable.addToFamilies(hbaseCol.getColumnFamily()); - if (hbaseCol.getColumnQualifier() != null) { - tHbaseTable.addToQualifiers(hbaseCol.getColumnQualifier()); - } else { - tHbaseTable.addToQualifiers(""); - } - tHbaseTable.addToBinary_encoded(hbaseCol.isBinaryEncoded()); - } - return tHbaseTable; - } - - /** - * This is copied from org.apache.hadoop.hbase.client.HTable. The only difference is - * that it does not use cache when calling getRegionLocation. - * TODO: Remove this function and use HTable.getRegionsInRange when the non-cache - * version has been ported to CDH (DISTRO-477). - * Get the corresponding regions for an arbitrary range of keys. - * <p> - * - * @param startRow - * Starting row in range, inclusive - * @param endRow - * Ending row in range, exclusive - * @return A list of HRegionLocations corresponding to the regions that - * contain the specified range - * @throws IOException - * if a remote or network exception occurs - */ - public static List<HRegionLocation> getRegionsInRange( - org.apache.hadoop.hbase.client.Table hbaseTbl, - final byte[] startKey, final byte[] endKey) throws IOException { - final boolean endKeyIsEndOfTable = Bytes.equals(endKey, HConstants.EMPTY_END_ROW); - if ((Bytes.compareTo(startKey, endKey) > 0) && !endKeyIsEndOfTable) { - throw new IllegalArgumentException("Invalid range: " + - Bytes.toStringBinary(startKey) + " > " + Bytes.toStringBinary(endKey)); - } - final List<HRegionLocation> regionList = new ArrayList<HRegionLocation>(); - byte[] currentKey = startKey; - Connection connection = ConnectionHolder.getConnection(hbaseConf_); - // Make sure only one thread is accessing the hbaseTbl. - synchronized (hbaseTbl) { - RegionLocator locator = connection.getRegionLocator(hbaseTbl.getName()); - do { - // always reload region location info. - HRegionLocation regionLocation = locator.getRegionLocation(currentKey, true); - regionList.add(regionLocation); - currentKey = regionLocation.getRegionInfo().getEndKey(); - } while (!Bytes.equals(currentKey, HConstants.EMPTY_END_ROW) && - (endKeyIsEndOfTable || Bytes.compareTo(currentKey, endKey) < 0)); - } - return regionList; - } - - /** - * Returns the storage handler class for HBase tables read by Hive. - */ - @Override - public String getStorageHandlerClassName() { - return HBASE_STORAGE_HANDLER; - } - - /** - * Returns statistics on this table as a tabular result set. Used for the - * SHOW TABLE STATS statement. The schema of the returned TResultSet is set - * inside this method. - */ - public TResultSet getTableStats() { - TResultSet result = new TResultSet(); - TResultSetMetadata resultSchema = new TResultSetMetadata(); - result.setSchema(resultSchema); - resultSchema.addToColumns( - new TColumn("Region Location", Type.STRING.toThrift())); - resultSchema.addToColumns(new TColumn("Start RowKey", - Type.STRING.toThrift())); - resultSchema.addToColumns(new TColumn("Est. #Rows", Type.BIGINT.toThrift())); - resultSchema.addToColumns(new TColumn("Size", Type.STRING.toThrift())); - - org.apache.hadoop.hbase.client.Table table; - try { - table = getHBaseTable(); - } catch (IOException e) { - LOG.error("Error getting HBase table " + hbaseTableName_, e); - throw new RuntimeException(e); - } - - // TODO: Consider fancier stats maintenance techniques for speeding up this process. - // Currently, we list all regions and perform a mini-scan of each of them to - // estimate the number of rows, the data size, etc., which is rather expensive. - try { - ClusterStatus clusterStatus = getClusterStatus(); - long totalNumRows = 0; - long totalSize = 0; - List<HRegionLocation> regions = HBaseTable.getRegionsInRange(table, - HConstants.EMPTY_END_ROW, HConstants.EMPTY_START_ROW); - for (HRegionLocation region : regions) { - TResultRowBuilder rowBuilder = new TResultRowBuilder(); - HRegionInfo regionInfo = region.getRegionInfo(); - Pair<Long, Long> estRowStats = - getEstimatedRowStatsForRegion(region, false, clusterStatus); - - long numRows = estRowStats.first.longValue(); - long regionSize = getRegionSize(region, clusterStatus); - totalNumRows += numRows; - totalSize += regionSize; - - // Add the region location, start rowkey, number of rows and raw size. - rowBuilder.add(String.valueOf(region.getHostname())) - .add(Bytes.toString(regionInfo.getStartKey())).add(numRows) - .addBytes(regionSize); - result.addToRows(rowBuilder.get()); - } - - // Total num rows and raw region size. - if (regions.size() > 1) { - TResultRowBuilder rowBuilder = new TResultRowBuilder(); - rowBuilder.add("Total").add("").add(totalNumRows).addBytes(totalSize); - result.addToRows(rowBuilder.get()); - } - } catch (IOException e) { - throw new RuntimeException(e); - } finally { - closeHBaseTable(table); - } - return result; - } - - /** - * Returns true if the given Metastore Table represents an HBase table. - * Versions of Hive/HBase are inconsistent which HBase related fields are set - * (e.g., HIVE-6548 changed the input format to null). - * For maximum compatibility consider all known fields that indicate an HBase table. - */ - public static boolean isHBaseTable( - org.apache.hadoop.hive.metastore.api.Table msTbl) { - if (msTbl.getParameters() != null && - msTbl.getParameters().containsKey(HBASE_STORAGE_HANDLER)) { - return true; - } - StorageDescriptor sd = msTbl.getSd(); - if (sd == null) return false; - if (sd.getInputFormat() != null && sd.getInputFormat().equals(HBASE_INPUT_FORMAT)) { - return true; - } else if (sd.getSerdeInfo() != null && - sd.getSerdeInfo().getSerializationLib() != null && - sd.getSerdeInfo().getSerializationLib().equals(HBASE_SERIALIZATION_LIB)) { - return true; - } - return false; - } -} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/catalog/HdfsCachePool.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/catalog/HdfsCachePool.java b/fe/src/main/java/com/cloudera/impala/catalog/HdfsCachePool.java deleted file mode 100644 index b8ff102..0000000 --- a/fe/src/main/java/com/cloudera/impala/catalog/HdfsCachePool.java +++ /dev/null @@ -1,65 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package com.cloudera.impala.catalog; - -import org.apache.hadoop.hdfs.protocol.CachePoolInfo; - -import com.cloudera.impala.thrift.TCatalogObjectType; -import com.cloudera.impala.thrift.THdfsCachePool; -import com.google.common.base.Preconditions; - -/** - * Represents an HDFS cache pool (CachePoolInfo class). Currently, the only metadata we - * care about for cache pools is the cache pool name. In the future it may be desirable - * to track additional metadata such as the owner, size, and current usage of the pool. - */ -public class HdfsCachePool implements CatalogObject { - private long catalogVersion_; - private final THdfsCachePool cachePool_; - - public HdfsCachePool(CachePoolInfo cachePoolInfo) { - cachePool_ = new THdfsCachePool(cachePoolInfo.getPoolName()); - } - - public HdfsCachePool(THdfsCachePool cachePool) { - Preconditions.checkNotNull(cachePool); - cachePool_ = cachePool; - } - - @Override - public TCatalogObjectType getCatalogObjectType() { - return TCatalogObjectType.HDFS_CACHE_POOL; - } - - public THdfsCachePool toThrift() { - return cachePool_; - } - - public static HdfsCachePool fromThrift(THdfsCachePool cachePool) { - return new HdfsCachePool(cachePool); - } - - @Override - public String getName() { return cachePool_.getPool_name(); } - @Override - public long getCatalogVersion() { return catalogVersion_; } - @Override - public void setCatalogVersion(long newVersion) { catalogVersion_ = newVersion; } - @Override - public boolean isLoaded() { return true; } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/catalog/HdfsCompression.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/catalog/HdfsCompression.java b/fe/src/main/java/com/cloudera/impala/catalog/HdfsCompression.java deleted file mode 100644 index 302ec99..0000000 --- a/fe/src/main/java/com/cloudera/impala/catalog/HdfsCompression.java +++ /dev/null @@ -1,85 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package com.cloudera.impala.catalog; - -import com.cloudera.impala.thrift.THdfsCompression; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableMap; - -/** - * Support for recognizing compression suffixes on data files. - * Compression of a file is recognized in mapreduce by looking for suffixes of - * supported codecs. - * For now Impala supports LZO, GZIP, SNAPPY, and BZIP2. LZO can use the specific HIVE - * input class. - */ -// TODO: Add LZ4? -public enum HdfsCompression { - NONE, - DEFLATE, - GZIP, - BZIP2, - SNAPPY, - LZO, - LZO_INDEX; //Lzo index file. - - /* Map from a suffix to a compression type */ - private static final ImmutableMap<String, HdfsCompression> SUFFIX_MAP = - ImmutableMap.<String, HdfsCompression>builder(). - put("deflate", DEFLATE). - put("gz", GZIP). - put("bz2", BZIP2). - put("snappy", SNAPPY). - put("lzo", LZO). - put("index", LZO_INDEX). - build(); - - /* Given a file name return its compression type, if any. */ - public static HdfsCompression fromFileName(String fileName) { - int index = fileName.lastIndexOf("."); - if (index == -1) { - return NONE; - } - - String suffix = fileName.substring(index + 1); - HdfsCompression compression = SUFFIX_MAP.get(suffix.toLowerCase()); - return compression == null ? NONE : compression; - } - - public THdfsCompression toThrift() { - switch (this) { - case NONE: return THdfsCompression.NONE; - case DEFLATE: return THdfsCompression.DEFLATE; - case GZIP: return THdfsCompression.GZIP; - case BZIP2: return THdfsCompression.BZIP2; - case SNAPPY: return THdfsCompression.SNAPPY_BLOCKED; - case LZO: return THdfsCompression.LZO; - default: throw new IllegalStateException("Unexpected codec: " + this); - } - } - - /* Returns a compression type based on (Hive's) intput format. Special case for LZO. */ - public static HdfsCompression fromHdfsInputFormatClass(String inputFormatClass) { - // TODO: Remove when we have the native LZO writer. - Preconditions.checkNotNull(inputFormatClass); - if (inputFormatClass.equals(HdfsFileFormat.LZO_TEXT.inputFormat())) { - return LZO; - } - return NONE; - } -}
