Modified: hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java (original) +++ hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java Fri Sep 5 19:15:44 2014 @@ -57,6 +57,7 @@ import org.apache.hadoop.hive.metastore. import org.apache.hadoop.hive.metastore.model.MRoleMap; import org.apache.hadoop.hive.metastore.model.MTableColumnPrivilege; import org.apache.hadoop.hive.metastore.model.MTablePrivilege; +import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; import org.apache.thrift.TException; public interface RawStore extends Configurable { @@ -130,6 +131,9 @@ public interface RawStore extends Config public abstract boolean addPartitions(String dbName, String tblName, List<Partition> parts) throws InvalidObjectException, MetaException; + public abstract boolean addPartitions(String dbName, String tblName, PartitionSpecProxy partitionSpec, boolean ifNotExists) + throws InvalidObjectException, MetaException; + public abstract Partition getPartition(String dbName, String tableName, List<String> part_vals) throws MetaException, NoSuchObjectException;
Modified: hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java (original) +++ hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java Fri Sep 5 19:15:44 2014 @@ -507,8 +507,18 @@ public class Warehouse { */ public FileStatus[] getFileStatusesForSD(StorageDescriptor desc) throws MetaException { + return getFileStatusesForLocation(desc.getLocation()); + } + + /** + * @param location + * @return array of FileStatus objects corresponding to the files + * making up the passed storage description + */ + public FileStatus[] getFileStatusesForLocation(String location) + throws MetaException { try { - Path path = new Path(desc.getLocation()); + Path path = new Path(location); FileSystem fileSys = path.getFileSystem(conf); return HiveStatsUtils.getFileStatusRecurse(path, -1, fileSys); } catch (IOException ioe) { Modified: hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/events/AddPartitionEvent.java URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/events/AddPartitionEvent.java?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/events/AddPartitionEvent.java (original) +++ hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/events/AddPartitionEvent.java Fri Sep 5 19:15:44 2014 @@ -21,19 +21,23 @@ package org.apache.hadoop.hive.metastore import org.apache.hadoop.hive.metastore.HiveMetaStore.HMSHandler; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; import java.util.Arrays; +import java.util.Iterator; import java.util.List; public class AddPartitionEvent extends ListenerEvent { private final Table table; private final List<Partition> partitions; + private PartitionSpecProxy partitionSpecProxy; public AddPartitionEvent(Table table, List<Partition> partitions, boolean status, HMSHandler handler) { super(status, handler); this.table = table; this.partitions = partitions; + this.partitionSpecProxy = null; } public AddPartitionEvent(Table table, Partition partition, boolean status, HMSHandler handler) { @@ -41,6 +45,16 @@ public class AddPartitionEvent extends L } /** + * Alternative constructor to use PartitionSpec APIs. + */ + public AddPartitionEvent(Table table, PartitionSpecProxy partitionSpec, boolean status, HMSHandler handler) { + super(status, handler); + this.table = table; + this.partitions = null; + this.partitionSpecProxy = partitionSpec; + } + + /** * @return The table. */ public Table getTable() { @@ -54,4 +68,11 @@ public class AddPartitionEvent extends L return partitions; } + /** + * @return Iterator for partitions. + */ + public Iterator<Partition> getPartitionIterator() { + return partitionSpecProxy == null ? null : partitionSpecProxy.getPartitionIterator(); + } + } Modified: hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/events/PreAddPartitionEvent.java URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/events/PreAddPartitionEvent.java?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/events/PreAddPartitionEvent.java (original) +++ hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/events/PreAddPartitionEvent.java Fri Sep 5 19:15:44 2014 @@ -21,19 +21,23 @@ package org.apache.hadoop.hive.metastore import org.apache.hadoop.hive.metastore.HiveMetaStore.HMSHandler; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; import java.util.Arrays; +import java.util.Iterator; import java.util.List; public class PreAddPartitionEvent extends PreEventContext { private final Table table; private final List<Partition> partitions; + private PartitionSpecProxy partitionSpecProxy; public PreAddPartitionEvent (Table table, List<Partition> partitions, HMSHandler handler) { super(PreEventType.ADD_PARTITION, handler); this.table = table; this.partitions = partitions; + this.partitionSpecProxy = null; } public PreAddPartitionEvent(Table table, Partition partition, HMSHandler handler) { @@ -41,6 +45,14 @@ public class PreAddPartitionEvent extend } /** + * Alternative constructor, using + */ + public PreAddPartitionEvent(Table table, PartitionSpecProxy partitionSpecProxy, HMSHandler handler) { + this(table, (List<Partition>)null, handler); + this.partitionSpecProxy = partitionSpecProxy; + } + + /** * @return the partitions */ public List<Partition> getPartitions() { @@ -53,4 +65,11 @@ public class PreAddPartitionEvent extend public Table getTable() { return table ; } + + /** + * @return Iterator over partition-list. + */ + public Iterator<Partition> getPartitionIterator() { + return partitionSpecProxy == null ? null : partitionSpecProxy.getPartitionIterator(); + } } Modified: hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionInfo.java URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionInfo.java?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionInfo.java (original) +++ hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionInfo.java Fri Sep 5 19:15:44 2014 @@ -34,9 +34,17 @@ public class CompactionInfo { private String fullPartitionName = null; private String fullTableName = null; + public CompactionInfo(String dbname, String tableName, String partName, CompactionType type) { + this.dbname = dbname; + this.tableName = tableName; + this.partName = partName; + this.type = type; + } + CompactionInfo() {} + public String getFullPartitionName() { if (fullPartitionName == null) { - StringBuffer buf = new StringBuffer(dbname); + StringBuilder buf = new StringBuilder(dbname); buf.append('.'); buf.append(tableName); if (partName != null) { @@ -50,11 +58,14 @@ public class CompactionInfo { public String getFullTableName() { if (fullTableName == null) { - StringBuffer buf = new StringBuffer(dbname); + StringBuilder buf = new StringBuilder(dbname); buf.append('.'); buf.append(tableName); fullTableName = buf.toString(); } return fullTableName; } + public boolean isMajorCompaction() { + return CompactionType.MAJOR == type; + } } Modified: hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java (original) +++ hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java Fri Sep 5 19:15:44 2014 @@ -535,6 +535,46 @@ public class CompactionTxnHandler extend deadlockCnt = 0; } } + + /** + * Queries metastore DB directly to find columns in the table which have statistics information. + * If {@code ci} includes partition info then per partition stats info is examined, otherwise + * table level stats are examined. + * @throws MetaException + */ + public List<String> findColumnsWithStats(CompactionInfo ci) throws MetaException { + Connection dbConn = getDbConn(Connection.TRANSACTION_READ_COMMITTED); + Statement stmt = null; + ResultSet rs = null; + try { + stmt = dbConn.createStatement(); + String s = "SELECT COLUMN_NAME FROM " + (ci.partName == null ? "TAB_COL_STATS" : "PART_COL_STATS") + + " WHERE DB_NAME='" + ci.dbname + "' AND TABLE_NAME='" + ci.tableName + "'" + + (ci.partName == null ? "" : " AND PARTITION_NAME='" + ci.partName + "'"); + LOG.debug("Going to execute <" + s + ">"); + rs = stmt.executeQuery(s); + List<String> columns = new ArrayList<String>(); + while(rs.next()) { + columns.add(rs.getString(1)); + } + LOG.debug("Found columns to update stats: " + columns + " on " + ci.tableName + + (ci.partName == null ? "" : "/" + ci.partName)); + dbConn.commit(); + return columns; + } catch (SQLException e) { + try { + LOG.error("Failed to find columns to analyze stats on for " + ci.tableName + + (ci.partName == null ? "" : "/" + ci.partName), e); + dbConn.rollback(); + } catch (SQLException e1) { + //nothing we can do here + } + throw new MetaException("Unable to connect to transaction database " + + StringUtils.stringifyException(e)); + } finally { + close(rs, stmt, dbConn); + } + } } Modified: hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java (original) +++ hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java Fri Sep 5 19:15:44 2014 @@ -860,6 +860,29 @@ public class TxnHandler { } /** + * Close the ResultSet. + * @param rs may be {@code null} + */ + void close(ResultSet rs) { + try { + if (rs != null && !rs.isClosed()) { + rs.close(); + } + } + catch(SQLException ex) { + LOG.warn("Failed to close statement " + ex.getMessage()); + } + } + + /** + * Close all 3 JDBC artifacts in order: {@code rs stmt dbConn} + */ + void close(ResultSet rs, Statement stmt, Connection dbConn) { + close(rs); + closeStmt(stmt); + closeDbConn(dbConn); + } + /** * Determine if an exception was a deadlock. Unfortunately there is no standard way to do * this, so we have to inspect the error messages and catch the telltale signs for each * different database. Modified: hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java (original) +++ hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java Fri Sep 5 19:15:44 2014 @@ -56,6 +56,7 @@ import org.apache.hadoop.hive.metastore. import org.apache.hadoop.hive.metastore.model.MRoleMap; import org.apache.hadoop.hive.metastore.model.MTableColumnPrivilege; import org.apache.hadoop.hive.metastore.model.MTablePrivilege; +import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; import org.apache.thrift.TException; /** @@ -677,6 +678,11 @@ public class DummyRawStoreControlledComm } @Override + public boolean addPartitions(String dbName, String tblName, PartitionSpecProxy partitionSpec, boolean ifNotExists) throws InvalidObjectException, MetaException { + return false; + } + + @Override public void dropPartitions(String dbName, String tblName, List<String> partNames) throws MetaException, NoSuchObjectException { objectStore.dropPartitions(dbName, tblName, partNames); Modified: hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java (original) +++ hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java Fri Sep 5 19:15:44 2014 @@ -57,6 +57,7 @@ import org.apache.hadoop.hive.metastore. import org.apache.hadoop.hive.metastore.model.MRoleMap; import org.apache.hadoop.hive.metastore.model.MTableColumnPrivilege; import org.apache.hadoop.hive.metastore.model.MTablePrivilege; +import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; import org.apache.thrift.TException; /** @@ -699,6 +700,11 @@ public class DummyRawStoreForJdoConnecti } @Override + public boolean addPartitions(String dbName, String tblName, PartitionSpecProxy partitionSpec, boolean ifNotExists) throws InvalidObjectException, MetaException { + return false; + } + + @Override public void dropPartitions(String dbName, String tblName, List<String> partNames) { } Modified: hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt (original) +++ hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt Fri Sep 5 19:15:44 2014 @@ -163,8 +163,8 @@ public class <ClassName> extends VectorE VectorExpressionDescriptor.Mode.PROJECTION) .setNumArguments(2) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("<OperandType1>"), - VectorExpressionDescriptor.ArgumentType.getType("<OperandType2>")) + VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType1>"), + VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType2>")) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); Modified: hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt (original) +++ hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt Fri Sep 5 19:15:44 2014 @@ -155,8 +155,8 @@ public class <ClassName> extends VectorE VectorExpressionDescriptor.Mode.PROJECTION) .setNumArguments(2) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("<OperandType1>"), - VectorExpressionDescriptor.ArgumentType.getType("<OperandType2>")) + VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType1>"), + VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType2>")) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); Modified: hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt (original) +++ hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt Fri Sep 5 19:15:44 2014 @@ -128,7 +128,7 @@ public class <ClassName> extends VectorE VectorExpressionDescriptor.Mode.PROJECTION) .setNumArguments(1) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("<OperandType>")) + VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType>")) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); } Modified: hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt (original) +++ hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt Fri Sep 5 19:15:44 2014 @@ -188,8 +188,8 @@ public class <ClassName> extends VectorE VectorExpressionDescriptor.Mode.FILTER) .setNumArguments(2) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("<OperandType1>"), - VectorExpressionDescriptor.ArgumentType.getType("<OperandType2>")) + VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType1>"), + VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType2>")) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); Modified: hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt (original) +++ hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt Fri Sep 5 19:15:44 2014 @@ -164,8 +164,8 @@ public class <ClassName> extends VectorE VectorExpressionDescriptor.Mode.FILTER) .setNumArguments(2) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("<OperandType1>"), - VectorExpressionDescriptor.ArgumentType.getType("<OperandType2>")) + VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType1>"), + VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType2>")) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); Modified: hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt (original) +++ hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt Fri Sep 5 19:15:44 2014 @@ -164,8 +164,8 @@ public class <ClassName> extends VectorE VectorExpressionDescriptor.Mode.FILTER) .setNumArguments(2) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("<OperandType1>"), - VectorExpressionDescriptor.ArgumentType.getType("<OperandType2>")) + VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType1>"), + VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType2>")) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.SCALAR, VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); Modified: hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt (original) +++ hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt Fri Sep 5 19:15:44 2014 @@ -184,9 +184,9 @@ public class <ClassName> extends VectorE VectorExpressionDescriptor.Mode.FILTER) .setNumArguments(3) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("string"), - VectorExpressionDescriptor.ArgumentType.getType("string"), - VectorExpressionDescriptor.ArgumentType.getType("string")) + VectorExpressionDescriptor.ArgumentType.STRING, + VectorExpressionDescriptor.ArgumentType.STRING, + VectorExpressionDescriptor.ArgumentType.STRING) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.SCALAR, Modified: hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnColumn.txt URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnColumn.txt?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnColumn.txt (original) +++ hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnColumn.txt Fri Sep 5 19:15:44 2014 @@ -176,8 +176,8 @@ public class <ClassName> extends VectorE .setNumArguments(3) .setArgumentTypes( VectorExpressionDescriptor.ArgumentType.getType("long"), - VectorExpressionDescriptor.ArgumentType.getType("<OperandType>"), - VectorExpressionDescriptor.ArgumentType.getType("<OperandType>")) + VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType>"), + VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType>")) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.COLUMN, Modified: hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt (original) +++ hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt Fri Sep 5 19:15:44 2014 @@ -167,8 +167,8 @@ public class <ClassName> extends VectorE .setNumArguments(3) .setArgumentTypes( VectorExpressionDescriptor.ArgumentType.getType("long"), - VectorExpressionDescriptor.ArgumentType.getType("<OperandType2>"), - VectorExpressionDescriptor.ArgumentType.getType("<OperandType3>")) + VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType2>"), + VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType3>")) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.COLUMN, Modified: hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt (original) +++ hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt Fri Sep 5 19:15:44 2014 @@ -169,8 +169,8 @@ public class <ClassName> extends VectorE .setNumArguments(3) .setArgumentTypes( VectorExpressionDescriptor.ArgumentType.getType("long"), - VectorExpressionDescriptor.ArgumentType.getType("<OperandType2>"), - VectorExpressionDescriptor.ArgumentType.getType("<OperandType3>")) + VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType2>"), + VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType3>")) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.SCALAR, Modified: hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt (original) +++ hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt Fri Sep 5 19:15:44 2014 @@ -154,8 +154,8 @@ public class <ClassName> extends VectorE .setNumArguments(3) .setArgumentTypes( VectorExpressionDescriptor.ArgumentType.getType("long"), - VectorExpressionDescriptor.ArgumentType.getType("<OperandType2>"), - VectorExpressionDescriptor.ArgumentType.getType("<OperandType3>")) + VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType2>"), + VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType3>")) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.SCALAR, Modified: hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt (original) +++ hive/branches/tez/ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt Fri Sep 5 19:15:44 2014 @@ -155,8 +155,8 @@ public class <ClassName> extends VectorE VectorExpressionDescriptor.Mode.PROJECTION) .setNumArguments(2) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("<OperandType1>"), - VectorExpressionDescriptor.ArgumentType.getType("<OperandType2>")) + VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType1>"), + VectorExpressionDescriptor.ArgumentType.getType("<VectorExprArgType2>")) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.SCALAR, VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/Driver.java URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/Driver.java?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/Driver.java (original) +++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/Driver.java Fri Sep 5 19:15:44 2014 @@ -135,7 +135,6 @@ public class Driver implements CommandPr private String errorMessage; private String SQLState; private Throwable downstreamError; - private HiveTxnManager txnMgr; // A limit on the number of threads that can be launched private int maxthreads; @@ -145,16 +144,6 @@ public class Driver implements CommandPr private String userName; - private void createTxnManager() throws SemanticException { - if (txnMgr == null) { - try { - txnMgr = TxnManagerFactory.getTxnManagerFactory().getTxnManager(conf); - } catch (LockException e) { - throw new SemanticException(e.getMessage(), e); - } - } - } - private boolean checkConcurrency() throws SemanticException { boolean supportConcurrency = conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY); if (!supportConcurrency) { @@ -868,7 +857,7 @@ public class Driver implements CommandPr // the input format. private int recordValidTxns() { try { - ValidTxnList txns = txnMgr.getValidTxns(); + ValidTxnList txns = SessionState.get().getTxnMgr().getValidTxns(); conf.set(ValidTxnList.VALID_TXNS_KEY, txns.toString()); return 0; } catch (LockException e) { @@ -893,7 +882,7 @@ public class Driver implements CommandPr try { - txnMgr.acquireLocks(plan, ctx, userName); + SessionState.get().getTxnMgr().acquireLocks(plan, ctx, userName); return 0; } catch (LockException e) { errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage(); @@ -917,7 +906,7 @@ public class Driver implements CommandPr perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.RELEASE_LOCKS); if (hiveLocks != null) { - ctx.getHiveTxnManager().getLockManager().releaseLocks(hiveLocks); + SessionState.get().getTxnMgr().getLockManager().releaseLocks(hiveLocks); } ctx.setHiveLocks(null); @@ -1048,9 +1037,14 @@ public class Driver implements CommandPr boolean requireLock = false; boolean ckLock = false; + SessionState ss = SessionState.get(); try { ckLock = checkConcurrency(); - createTxnManager(); + try { + ss.initTxnMgr(conf); + } catch (LockException e) { + throw new SemanticException(e.getMessage(), e); + } } catch (SemanticException e) { errorMessage = "FAILED: Error in semantic analysis: " + e.getMessage(); SQLState = ErrorMsg.findSQLState(e.getMessage()); @@ -1074,7 +1068,7 @@ public class Driver implements CommandPr // the reason that we set the txn manager for the cxt here is because each // query has its own ctx object. The txn mgr is shared across the // same instance of Driver, which can run multiple queries. - ctx.setHiveTxnManager(txnMgr); + ctx.setHiveTxnManager(ss.getTxnMgr()); if (ckLock) { boolean lockOnlyMapred = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_LOCK_MAPRED_ONLY); @@ -1670,9 +1664,6 @@ public class Driver implements CommandPr e.getMessage()); } } - if (txnMgr != null) { - txnMgr.closeTxnManager(); - } } public org.apache.hadoop.hive.ql.plan.api.Query getQueryPlan() throws IOException { Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java (original) +++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java Fri Sep 5 19:15:44 2014 @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hive.common.util.AnnotationUtils; @@ -28,15 +30,46 @@ import org.apache.hive.common.util.Annot */ public class VectorExpressionDescriptor { + private static final Log LOG = LogFactory.getLog( + VectorExpressionDescriptor.class.getName()); + final static int MAX_NUM_ARGUMENTS = 3; + // + // Special handling is needed at times for DATE, TIMESTAMP, (STRING), CHAR, and VARCHAR so they can + // be named specifically as argument types. + // + // LongColumnVector --> + // INT_FAMILY + // DATE + // TIMESTAMP + // + // DoubleColumnVector --> + // FLOAT_FAMILY + // + // DecimalColumnVector --> + // DECIMAL + // + // BytesColumnVector --> + // STRING + // CHAR + // VARCHAR + // public enum ArgumentType { - NONE(0), - LONG(1), - DOUBLE(2), - STRING(3), - DECIMAL(4), - ANY(7); + NONE (0x000), + INT_FAMILY (0x001), + FLOAT_FAMILY (0x002), + DECIMAL (0x004), + STRING (0x008), + CHAR (0x010), + VARCHAR (0x020), + STRING_FAMILY (STRING.value | CHAR.value | VARCHAR.value), + DATE (0x040), + TIMESTAMP (0x080), + DATETIME_FAMILY (DATE.value | TIMESTAMP.value), + INT_DATETIME_FAMILY (INT_FAMILY.value | DATETIME_FAMILY.value), + STRING_DATETIME_FAMILY (STRING_FAMILY.value | DATETIME_FAMILY.value), + ALL_FAMILY (0xFFF); private final int value; @@ -48,12 +81,79 @@ public class VectorExpressionDescriptor return value; } + public static ArgumentType fromHiveTypeName(String hiveTypeName) { + String lower = hiveTypeName.toLowerCase(); + if (lower.equals("tinyint") || + lower.equals("smallint") || + lower.equals("int") || + lower.equals("bigint") || + lower.equals("boolean") || + lower.equals("long")) { + return INT_FAMILY; + } else if (lower.equals("double") || lower.equals("float")) { + return FLOAT_FAMILY; + } else if (lower.equals("string")) { + return STRING; + } else if (VectorizationContext.charTypePattern.matcher(lower).matches()) { + return CHAR; + } else if (VectorizationContext.varcharTypePattern.matcher(lower).matches()) { + return VARCHAR; + } else if (VectorizationContext.decimalTypePattern.matcher(lower).matches()) { + return DECIMAL; + } else if (lower.equals("timestamp")) { + return TIMESTAMP; + } else if (lower.equals("date")) { + return DATE; + } else if (lower.equals("void")) { + // The old code let void through... + return INT_FAMILY; + } else { + return NONE; + } + } + public static ArgumentType getType(String inType) { - String type = VectorizationContext.getNormalizedTypeName(inType); - if (VectorizationContext.decimalTypePattern.matcher(type).matches()) { - type = "decimal"; + if (inType.equalsIgnoreCase("long")) { + // A synonym in some places in the code... + return INT_FAMILY; + } else if (inType.equalsIgnoreCase("double")) { + // A synonym in some places in the code... + return FLOAT_FAMILY; + } else if (VectorizationContext.decimalTypePattern.matcher(inType).matches()) { + return DECIMAL; + } else if (VectorizationContext.charTypePattern.matcher(inType).matches()) { + return CHAR; + } else if (VectorizationContext.varcharTypePattern.matcher(inType).matches()) { + return VARCHAR; + } + return valueOf(inType.toUpperCase()); + } + + public boolean isSameTypeOrFamily(ArgumentType other) { + return ((value & other.value) != 0); + } + + public static String getVectorColumnSimpleName(ArgumentType argType) { + if (argType == INT_FAMILY || + argType == DATE || + argType == TIMESTAMP) { + return "Long"; + } else if (argType == FLOAT_FAMILY) { + return "Double"; + } else if (argType == DECIMAL) { + return "Decimal"; + } else if (argType == STRING || + argType == CHAR || + argType == VARCHAR) { + return "String"; + } else { + return "None"; } - return valueOf(type.toUpperCase()); + } + + public static String getVectorColumnSimpleName(String hiveTypeName) { + ArgumentType argType = fromHiveTypeName(hiveTypeName); + return getVectorColumnSimpleName(argType); } } @@ -162,15 +262,12 @@ public class VectorExpressionDescriptor */ public static final class Descriptor { - @Override - public boolean equals(Object o) { - Descriptor other = (Descriptor) o; + public boolean matches(Descriptor other) { if (!mode.equals(other.mode) || (argCount != other.argCount) ) { return false; } for (int i = 0; i < argCount; i++) { - if (!argTypes[i].equals(other.argTypes[i]) && (!argTypes[i].equals(ArgumentType.ANY) && - !other.argTypes[i].equals(ArgumentType.ANY))) { + if (!argTypes[i].isSameTypeOrFamily(other.argTypes[i])) { return false; } if (!exprTypes[i].equals(other.exprTypes[i])) { @@ -228,13 +325,23 @@ public class VectorExpressionDescriptor Class<? extends VectorExpression>[] list = annotation.value(); for (Class<? extends VectorExpression> ve : list) { try { - if (ve.newInstance().getDescriptor().equals(descriptor)) { + if (ve.newInstance().getDescriptor().matches(descriptor)) { return ve; } } catch (Exception ex) { throw new HiveException(ex); } } + if (LOG.isDebugEnabled()) { + LOG.debug("getVectorExpressionClass udf " + udf.getSimpleName() + " descriptor: " + descriptor.toString()); + for (Class<? extends VectorExpression> ve : list) { + try { + LOG.debug("getVectorExpressionClass doesn't match " + ve.getSimpleName() + " " + ve.newInstance().getDescriptor().toString()); + } catch (Exception ex) { + throw new HiveException(ex); + } + } + } return null; } } Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original) +++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Fri Sep 5 19:15:44 2014 @@ -33,7 +33,9 @@ import java.util.regex.Pattern; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.common.type.Decimal128; +import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; import org.apache.hadoop.hive.ql.exec.FunctionInfo; @@ -97,11 +99,13 @@ import org.apache.hadoop.hive.ql.udf.gen import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.util.StringUtils; /** * Context class for vectorization execution. @@ -123,6 +127,15 @@ public class VectorizationContext { public static final Pattern decimalTypePattern = Pattern.compile("decimal.*", Pattern.CASE_INSENSITIVE); + public static final Pattern charTypePattern = Pattern.compile("char.*", + Pattern.CASE_INSENSITIVE); + + public static final Pattern varcharTypePattern = Pattern.compile("varchar.*", + Pattern.CASE_INSENSITIVE); + + public static final Pattern charVarcharTypePattern = Pattern.compile("char.*|varchar.*", + Pattern.CASE_INSENSITIVE); + //Map column number to type private final OutputColumnManager ocm; @@ -210,14 +223,17 @@ public class VectorizationContext { private final Set<Integer> usedOutputColumns = new HashSet<Integer>(); - int allocateOutputColumn(String columnType) { - if (initialOutputCol < 0) { - // This is a test - return 0; + int allocateOutputColumn(String hiveTypeName) { + if (initialOutputCol < 0) { + // This is a test + return 0; + } + + // We need to differentiate DECIMAL columns by their precision and scale... + String normalizedTypeName = getNormalizedName(hiveTypeName); + int relativeCol = allocateOutputColumnInternal(normalizedTypeName); + return initialOutputCol + relativeCol; } - int relativeCol = allocateOutputColumnInternal(columnType); - return initialOutputCol + relativeCol; - } private int allocateOutputColumnInternal(String columnType) { for (int i = 0; i < outputColCount; i++) { @@ -548,6 +564,12 @@ public class VectorizationContext { case STRING: udfClass = new UDFToString(); break; + case CHAR: + genericUdf = new GenericUDFToChar(); + break; + case VARCHAR: + genericUdf = new GenericUDFToVarchar(); + break; case BOOLEAN: udfClass = new UDFToBoolean(); break; @@ -592,15 +614,15 @@ public class VectorizationContext { Class<? extends UDF> udfClass = bridge.getUdfClass(); if (udfClass.equals(UDFHex.class) || udfClass.equals(UDFConv.class) - || isCastToIntFamily(udfClass) && arg0Type(expr).equals("string") - || isCastToFloatFamily(udfClass) && arg0Type(expr).equals("string") + || isCastToIntFamily(udfClass) && isStringFamily(arg0Type(expr)) + || isCastToFloatFamily(udfClass) && isStringFamily(arg0Type(expr)) || udfClass.equals(UDFToString.class) && (arg0Type(expr).equals("timestamp") || arg0Type(expr).equals("double") || arg0Type(expr).equals("float"))) { return true; } - } else if ((gudf instanceof GenericUDFTimestamp && arg0Type(expr).equals("string")) + } else if ((gudf instanceof GenericUDFTimestamp && isStringFamily(arg0Type(expr))) /* GenericUDFCase and GenericUDFWhen are implemented with the UDF Adaptor because * of their complexity and generality. In the future, variations of these @@ -615,6 +637,16 @@ public class VectorizationContext { || gudf instanceof GenericUDFCase || gudf instanceof GenericUDFWhen) { return true; + } else if (gudf instanceof GenericUDFToChar && + (arg0Type(expr).equals("timestamp") + || arg0Type(expr).equals("double") + || arg0Type(expr).equals("float"))) { + return true; + } else if (gudf instanceof GenericUDFToVarchar && + (arg0Type(expr).equals("timestamp") + || arg0Type(expr).equals("double") + || arg0Type(expr).equals("float"))) { + return true; } return false; } @@ -721,27 +753,21 @@ public class VectorizationContext { private VectorExpression getConstantVectorExpression(Object constantValue, TypeInfo typeInfo, Mode mode) throws HiveException { - String type = typeInfo.getTypeName(); - String colVectorType = getNormalizedTypeName(type); + String typeName = typeInfo.getTypeName(); + VectorExpressionDescriptor.ArgumentType vectorArgType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(typeName); + if (vectorArgType == VectorExpressionDescriptor.ArgumentType.NONE) { + throw new HiveException("No vector argument type for type name " + typeName); + } int outCol = -1; if (mode == Mode.PROJECTION) { - outCol = ocm.allocateOutputColumn(colVectorType); + outCol = ocm.allocateOutputColumn(typeName); } if (constantValue == null) { - return new ConstantVectorExpression(outCol, type, true); - } else if (decimalTypePattern.matcher(type).matches()) { - VectorExpression ve = new ConstantVectorExpression(outCol, (Decimal128) constantValue); - ve.setOutputType(typeInfo.getTypeName()); - return ve; - } else if (type.equalsIgnoreCase("long") || type.equalsIgnoreCase("int") || - type.equalsIgnoreCase("short") || type.equalsIgnoreCase("byte")) { - return new ConstantVectorExpression(outCol, - ((Number) constantValue).longValue()); - } else if (type.equalsIgnoreCase("double") || type.equalsIgnoreCase("float")) { - return new ConstantVectorExpression(outCol, ((Number) constantValue).doubleValue()); - } else if (type.equalsIgnoreCase("string")) { - return new ConstantVectorExpression(outCol, ((String) constantValue).getBytes()); - } else if (type.equalsIgnoreCase("boolean")) { + return new ConstantVectorExpression(outCol, typeName, true); + } + + // Boolean is special case. + if (typeName.equalsIgnoreCase("boolean")) { if (mode == Mode.FILTER) { if (((Boolean) constantValue).booleanValue()) { return new FilterConstantBooleanVectorExpression(1); @@ -756,7 +782,26 @@ public class VectorizationContext { } } } - throw new HiveException("Unsupported constant type: "+type.toString()); + + switch (vectorArgType) { + case INT_FAMILY: + return new ConstantVectorExpression(outCol, ((Number) constantValue).longValue()); + case FLOAT_FAMILY: + return new ConstantVectorExpression(outCol, ((Number) constantValue).doubleValue()); + case DECIMAL: + VectorExpression ve = new ConstantVectorExpression(outCol, (Decimal128) constantValue); + // Set type name with decimal precision, scale, etc. + ve.setOutputType(typeName); + return ve; + case STRING: + return new ConstantVectorExpression(outCol, ((String) constantValue).getBytes()); + case CHAR: + return new ConstantVectorExpression(outCol, ((HiveChar) constantValue)); + case VARCHAR: + return new ConstantVectorExpression(outCol, ((HiveVarchar) constantValue)); + default: + throw new HiveException("Unsupported constant type: " + typeName); + } } /** @@ -799,7 +844,15 @@ public class VectorizationContext { builder.setMode(mode); for (int i = 0; i < numChildren; i++) { ExprNodeDesc child = childExpr.get(i); - builder.setArgumentType(i, child.getTypeString()); + String childTypeString = child.getTypeString(); + if (childTypeString == null) { + throw new HiveException("Null child type name string"); + } + String undecoratedTypeName = getUndecoratedName(childTypeString); + if (undecoratedTypeName == null) { + throw new HiveException("No match for type string " + childTypeString + " from undecorated type name method"); + } + builder.setArgumentType(i, undecoratedTypeName); if ((child instanceof ExprNodeGenericFuncDesc) || (child instanceof ExprNodeColumnDesc)) { builder.setInputExpressionType(i, InputExpressionType.COLUMN); } else if (child instanceof ExprNodeConstantDesc) { @@ -829,7 +882,11 @@ public class VectorizationContext { try { for (int i = 0; i < numChildren; i++) { ExprNodeDesc child = childExpr.get(i); - inputTypes[i] = VectorExpression.Type.getValue(child.getTypeInfo().getTypeName()); + String undecoratedName = getUndecoratedName(child.getTypeInfo().getTypeName()); + inputTypes[i] = VectorExpression.Type.getValue(undecoratedName); + if (inputTypes[i] == VectorExpression.Type.OTHER){ + throw new HiveException("No vector type for " + vectorClass.getSimpleName() + " argument #" + i + " type name " + undecoratedName); + } if (child instanceof ExprNodeGenericFuncDesc) { VectorExpression vChild = getVectorExpression(child, childrenMode); children.add(vChild); @@ -870,36 +927,71 @@ public class VectorizationContext { return Mode.PROJECTION; } + private String getNewInstanceArgumentString(Object [] args) { + if (args == null) { + return "arguments: NULL"; + } + ArrayList<String> argClasses = new ArrayList<String>(); + for (Object obj : args) { + argClasses.add(obj.getClass().getSimpleName()); + } + return "arguments: " + Arrays.toString(args) + ", argument classes: " + argClasses.toString(); + } + private VectorExpression instantiateExpression(Class<?> vclass, TypeInfo returnType, Object...args) throws HiveException { VectorExpression ve = null; Constructor<?> ctor = getConstructor(vclass); int numParams = ctor.getParameterTypes().length; int argsLength = (args == null) ? 0 : args.length; - try { - if (numParams == 0) { + if (numParams == 0) { + try { ve = (VectorExpression) ctor.newInstance(); - } else if (numParams == argsLength) { + } catch (Exception ex) { + throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with 0 arguments, exception: " + + StringUtils.stringifyException(ex)); + } + } else if (numParams == argsLength) { + try { ve = (VectorExpression) ctor.newInstance(args); - } else if (numParams == argsLength + 1) { - // Additional argument is needed, which is the outputcolumn. + } catch (Exception ex) { + throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with " + getNewInstanceArgumentString(args) + ", exception: " + + StringUtils.stringifyException(ex)); + } + } else if (numParams == argsLength + 1) { + // Additional argument is needed, which is the outputcolumn. + Object [] newArgs = null; + try { String outType; // Special handling for decimal because decimal types need scale and precision parameter. // This special handling should be avoided by using returnType uniformly for all cases. if (returnType != null) { - outType = getNormalizedTypeName(returnType.getTypeName()).toLowerCase(); + outType = getNormalizedName(returnType.getTypeName()).toLowerCase(); + if (outType == null) { + throw new HiveException("No vector type for type name " + returnType); + } } else { outType = ((VectorExpression) vclass.newInstance()).getOutputType(); } int outputCol = ocm.allocateOutputColumn(outType); - Object [] newArgs = Arrays.copyOf(args, numParams); + newArgs = Arrays.copyOf(args, numParams); newArgs[numParams-1] = outputCol; + ve = (VectorExpression) ctor.newInstance(newArgs); ve.setOutputType(outType); + } catch (Exception ex) { + throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with arguments " + getNewInstanceArgumentString(newArgs) + ", exception: " + + StringUtils.stringifyException(ex)); + } + } + // Add maxLength parameter to UDFs that have CHAR or VARCHAR output. + if (ve instanceof TruncStringOutput) { + TruncStringOutput truncStringOutput = (TruncStringOutput) ve; + if (returnType instanceof BaseCharTypeInfo) { + BaseCharTypeInfo baseCharTypeInfo = (BaseCharTypeInfo) returnType; + truncStringOutput.setMaxLength(baseCharTypeInfo.getLength()); } - } catch (Exception ex) { - throw new HiveException("Could not instantiate " + vclass.getSimpleName(), ex); } return ve; } @@ -933,6 +1025,10 @@ public class VectorizationContext { } } else if (udf instanceof GenericUDFToDecimal) { return getCastToDecimal(childExpr, returnType); + } else if (udf instanceof GenericUDFToChar) { + return getCastToChar(childExpr, returnType); + } else if (udf instanceof GenericUDFToVarchar) { + return getCastToVarChar(childExpr, returnType); } // Now do a general lookup @@ -962,7 +1058,7 @@ public class VectorizationContext { inputColumns[i++] = ve.getOutputColumn(); } - int outColumn = ocm.allocateOutputColumn(getNormalizedTypeName(returnType.getTypeName())); + int outColumn = ocm.allocateOutputColumn(returnType.getTypeName()); VectorCoalesce vectorCoalesce = new VectorCoalesce(inputColumns, outColumn); vectorCoalesce.setOutputType(returnType.getTypeName()); vectorCoalesce.setChildExpressions(vectorChildren); @@ -989,7 +1085,7 @@ public class VectorizationContext { inputColumns[i++] = ve.getOutputColumn(); } - int outColumn = ocm.allocateOutputColumn(getNormalizedTypeName(returnType.getTypeName())); + int outColumn = ocm.allocateOutputColumn(returnType.getTypeName()); VectorElt vectorElt = new VectorElt(inputColumns, outColumn); vectorElt.setOutputType(returnType.getTypeName()); vectorElt.setChildExpressions(vectorChildren); @@ -1265,6 +1361,64 @@ public class VectorizationContext { throw new HiveException("Unhandled cast input type: " + inputType); } + private VectorExpression getCastToChar(List<ExprNodeDesc> childExpr, TypeInfo returnType) + throws HiveException { + ExprNodeDesc child = childExpr.get(0); + String inputType = childExpr.get(0).getTypeString(); + if (child instanceof ExprNodeConstantDesc) { + // Don't do constant folding here. Wait until the optimizer is changed to do it. + // Family of related JIRAs: HIVE-7421, HIVE-7422, and HIVE-7424. + return null; + } + if (inputType.equals("boolean")) { + // Boolean must come before the integer family. It's a special case. + return createVectorExpression(CastBooleanToCharViaLongToChar.class, childExpr, Mode.PROJECTION, null); + } else if (isIntFamily(inputType)) { + return createVectorExpression(CastLongToChar.class, childExpr, Mode.PROJECTION, null); + } else if (isDecimalFamily(inputType)) { + return createVectorExpression(CastDecimalToChar.class, childExpr, Mode.PROJECTION, returnType); + } else if (isDateFamily(inputType)) { + return createVectorExpression(CastDateToChar.class, childExpr, Mode.PROJECTION, returnType); + } else if (isStringFamily(inputType)) { + return createVectorExpression(CastStringGroupToChar.class, childExpr, Mode.PROJECTION, returnType); + } + + /* + * Timestamp, float, and double types are handled by the legacy code path. See isLegacyPathUDF. + */ + + throw new HiveException("Unhandled cast input type: " + inputType); + } + + private VectorExpression getCastToVarChar(List<ExprNodeDesc> childExpr, TypeInfo returnType) + throws HiveException { + ExprNodeDesc child = childExpr.get(0); + String inputType = childExpr.get(0).getTypeString(); + if (child instanceof ExprNodeConstantDesc) { + // Don't do constant folding here. Wait until the optimizer is changed to do it. + // Family of related JIRAs: HIVE-7421, HIVE-7422, and HIVE-7424. + return null; + } + if (inputType.equals("boolean")) { + // Boolean must come before the integer family. It's a special case. + return createVectorExpression(CastBooleanToVarCharViaLongToVarChar.class, childExpr, Mode.PROJECTION, null); + } else if (isIntFamily(inputType)) { + return createVectorExpression(CastLongToVarChar.class, childExpr, Mode.PROJECTION, null); + } else if (isDecimalFamily(inputType)) { + return createVectorExpression(CastDecimalToVarChar.class, childExpr, Mode.PROJECTION, returnType); + } else if (isDateFamily(inputType)) { + return createVectorExpression(CastDateToVarChar.class, childExpr, Mode.PROJECTION, returnType); + } else if (isStringFamily(inputType)) { + return createVectorExpression(CastStringGroupToVarChar.class, childExpr, Mode.PROJECTION, returnType); + } + + /* + * Timestamp, float, and double types are handled by the legacy code path. See isLegacyPathUDF. + */ + + throw new HiveException("Unhandled cast input type: " + inputType); + } + private VectorExpression getCastToDoubleExpression(Class<?> udf, List<ExprNodeDesc> childExpr, TypeInfo returnType) throws HiveException { ExprNodeDesc child = childExpr.get(0); @@ -1304,12 +1458,12 @@ public class VectorizationContext { return getConstantVectorExpression(null, TypeInfoFactory.booleanTypeInfo, Mode.PROJECTION); } // Long and double are handled using descriptors, string needs to be specially handled. - if (inputType.equals("string")) { + if (isStringFamily(inputType)) { // string casts to false if it is 0 characters long, otherwise true VectorExpression lenExpr = createVectorExpression(StringLength.class, childExpr, Mode.PROJECTION, null); - int outputCol = ocm.allocateOutputColumn("integer"); + int outputCol = ocm.allocateOutputColumn("Long"); VectorExpression lenToBoolExpr = new CastLongToBooleanViaLongToLong(lenExpr.getOutputColumn(), outputCol); lenToBoolExpr.setChildExpressions(new VectorExpression[] {lenExpr}); @@ -1411,6 +1565,14 @@ public class VectorizationContext { cl = FilterStringColumnBetween.class; } else if (colType.equals("string") && notKeywordPresent) { cl = FilterStringColumnNotBetween.class; + } else if (varcharTypePattern.matcher(colType).matches() && !notKeywordPresent) { + cl = FilterVarCharColumnBetween.class; + } else if (varcharTypePattern.matcher(colType).matches() && notKeywordPresent) { + cl = FilterVarCharColumnNotBetween.class; + } else if (charTypePattern.matcher(colType).matches() && !notKeywordPresent) { + cl = FilterCharColumnBetween.class; + } else if (charTypePattern.matcher(colType).matches() && notKeywordPresent) { + cl = FilterCharColumnNotBetween.class; } else if (colType.equals("timestamp")) { // Get timestamp boundary values as longs instead of the expected strings @@ -1483,13 +1645,13 @@ public class VectorizationContext { // Allocate output column and get column number; int outputCol = -1; - String resultType = expr.getTypeInfo().getTypeName(); - String resultColVectorType = getNormalizedTypeName(resultType); + String resultTypeName = expr.getTypeInfo().getTypeName(); - outputCol = ocm.allocateOutputColumn(resultColVectorType); + outputCol = ocm.allocateOutputColumn(resultTypeName); // Make vectorized operator - VectorExpression ve = new VectorUDFAdaptor(expr, outputCol, resultColVectorType, argDescs); + String normalizedName = getNormalizedName(resultTypeName); + VectorExpression ve = new VectorUDFAdaptor(expr, outputCol, normalizedName, argDescs); // Set child expressions VectorExpression[] childVEs = null; @@ -1509,7 +1671,7 @@ public class VectorizationContext { } public static boolean isStringFamily(String resultType) { - return resultType.equalsIgnoreCase("string"); + return resultType.equalsIgnoreCase("string") || charVarcharTypePattern.matcher(resultType).matches(); } public static boolean isDatetimeFamily(String resultType) { @@ -1617,7 +1779,7 @@ public class VectorizationContext { "Non-constant argument not supported for vectorization."); } ExprNodeConstantDesc constExpr = (ExprNodeConstantDesc) expr; - if (constExpr.getTypeString().equals("string")) { + if (isStringFamily(constExpr.getTypeString())) { // create expression tree with type cast from string to timestamp ExprNodeGenericFuncDesc expr2 = new ExprNodeGenericFuncDesc(); @@ -1667,63 +1829,99 @@ public class VectorizationContext { } } - static String getNormalizedTypeName(String colType){ - String normalizedType = null; - if (colType.equalsIgnoreCase("Double") || colType.equalsIgnoreCase("Float")) { - normalizedType = "Double"; - } else if (colType.equalsIgnoreCase("String")) { - normalizedType = "String"; - } else if (decimalTypePattern.matcher(colType).matches()) { + static String getNormalizedName(String hiveTypeName) { + VectorExpressionDescriptor.ArgumentType argType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(hiveTypeName); + switch (argType) { + case INT_FAMILY: + return "Long"; + case FLOAT_FAMILY: + return "Double"; + case DECIMAL: //Return the decimal type as is, it includes scale and precision. - normalizedType = colType; - } else { - normalizedType = "Long"; + return hiveTypeName; + case STRING: + return "String"; + case CHAR: + //Return the CHAR type as is, it includes maximum length + return hiveTypeName; + case VARCHAR: + //Return the VARCHAR type as is, it includes maximum length. + return hiveTypeName; + case DATE: + return "Date"; + case TIMESTAMP: + return "Timestamp"; + default: + return "None"; + } + } + + static String getUndecoratedName(String hiveTypeName) { + VectorExpressionDescriptor.ArgumentType argType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(hiveTypeName); + switch (argType) { + case INT_FAMILY: + return "Long"; + case FLOAT_FAMILY: + return "Double"; + case DECIMAL: + return "Decimal"; + case STRING: + return "String"; + case CHAR: + return "Char"; + case VARCHAR: + return "VarChar"; + case DATE: + return "Date"; + case TIMESTAMP: + return "Timestamp"; + default: + return "None"; } - return normalizedType; } - static Object[][] aggregatesDefinition = { - {"min", "Long", VectorUDAFMinLong.class}, - {"min", "Double", VectorUDAFMinDouble.class}, - {"min", "String", VectorUDAFMinString.class}, - {"min", "Decimal",VectorUDAFMinDecimal.class}, - {"max", "Long", VectorUDAFMaxLong.class}, - {"max", "Double", VectorUDAFMaxDouble.class}, - {"max", "String", VectorUDAFMaxString.class}, - {"max", "Decimal",VectorUDAFMaxDecimal.class}, - {"count", null, VectorUDAFCountStar.class}, - {"count", "Long", VectorUDAFCount.class}, - {"count", "Double", VectorUDAFCount.class}, - {"count", "String", VectorUDAFCount.class}, - {"count", "Decimal",VectorUDAFCount.class}, - {"sum", "Long", VectorUDAFSumLong.class}, - {"sum", "Double", VectorUDAFSumDouble.class}, - {"sum", "Decimal",VectorUDAFSumDecimal.class}, - {"avg", "Long", VectorUDAFAvgLong.class}, - {"avg", "Double", VectorUDAFAvgDouble.class}, - {"avg", "Decimal",VectorUDAFAvgDecimal.class}, - {"variance", "Long", VectorUDAFVarPopLong.class}, - {"var_pop", "Long", VectorUDAFVarPopLong.class}, - {"variance", "Double", VectorUDAFVarPopDouble.class}, - {"var_pop", "Double", VectorUDAFVarPopDouble.class}, - {"variance", "Decimal",VectorUDAFVarPopDecimal.class}, - {"var_pop", "Decimal",VectorUDAFVarPopDecimal.class}, - {"var_samp", "Long", VectorUDAFVarSampLong.class}, - {"var_samp" , "Double", VectorUDAFVarSampDouble.class}, - {"var_samp" , "Decimal",VectorUDAFVarSampDecimal.class}, - {"std", "Long", VectorUDAFStdPopLong.class}, - {"stddev", "Long", VectorUDAFStdPopLong.class}, - {"stddev_pop","Long", VectorUDAFStdPopLong.class}, - {"std", "Double", VectorUDAFStdPopDouble.class}, - {"stddev", "Double", VectorUDAFStdPopDouble.class}, - {"stddev_pop","Double", VectorUDAFStdPopDouble.class}, - {"std", "Decimal",VectorUDAFStdPopDecimal.class}, - {"stddev", "Decimal",VectorUDAFStdPopDecimal.class}, - {"stddev_pop","Decimal",VectorUDAFStdPopDecimal.class}, - {"stddev_samp","Long", VectorUDAFStdSampLong.class}, - {"stddev_samp","Double",VectorUDAFStdSampDouble.class}, - {"stddev_samp","Decimal",VectorUDAFStdSampDecimal.class}, - }; + static ArrayList<AggregateDefinition> aggregatesDefinition = new ArrayList<AggregateDefinition>() {{ + add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, VectorUDAFMinLong.class)); + add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, VectorUDAFMinDouble.class)); + add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, VectorUDAFMinString.class)); + add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.DECIMAL, VectorUDAFMinDecimal.class)); + add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, VectorUDAFMaxLong.class)); + add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, VectorUDAFMaxDouble.class)); + add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, VectorUDAFMaxString.class)); + add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.DECIMAL, VectorUDAFMaxDecimal.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.NONE, VectorUDAFCountStar.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, VectorUDAFCount.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, VectorUDAFCount.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, VectorUDAFCount.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.DECIMAL, VectorUDAFCount.class)); + add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, VectorUDAFSumLong.class)); + add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, VectorUDAFSumDouble.class)); + add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.DECIMAL, VectorUDAFSumDecimal.class)); + add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, VectorUDAFAvgLong.class)); + add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, VectorUDAFAvgDouble.class)); + add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.DECIMAL, VectorUDAFAvgDecimal.class)); + add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, VectorUDAFVarPopLong.class)); + add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, VectorUDAFVarPopLong.class)); + add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, VectorUDAFVarPopDouble.class)); + add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, VectorUDAFVarPopDouble.class)); + add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.DECIMAL, VectorUDAFVarPopDecimal.class)); + add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.DECIMAL, VectorUDAFVarPopDecimal.class)); + add(new AggregateDefinition("var_samp", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, VectorUDAFVarSampLong.class)); + add(new AggregateDefinition("var_samp" , VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, VectorUDAFVarSampDouble.class)); + add(new AggregateDefinition("var_samp" , VectorExpressionDescriptor.ArgumentType.DECIMAL, VectorUDAFVarSampDecimal.class)); + add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, VectorUDAFStdPopLong.class)); + add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, VectorUDAFStdPopLong.class)); + add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, VectorUDAFStdPopLong.class)); + add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, VectorUDAFStdPopDouble.class)); + add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, VectorUDAFStdPopDouble.class)); + add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, VectorUDAFStdPopDouble.class)); + add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.DECIMAL, VectorUDAFStdPopDecimal.class)); + add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.DECIMAL, VectorUDAFStdPopDecimal.class)); + add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.DECIMAL, VectorUDAFStdPopDecimal.class)); + add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, VectorUDAFStdSampLong.class)); + add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, VectorUDAFStdSampDouble.class)); + add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.DECIMAL, VectorUDAFStdSampDecimal.class)); + }}; public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc) throws HiveException { @@ -1737,22 +1935,22 @@ public class VectorizationContext { } String aggregateName = desc.getGenericUDAFName(); - String inputType = null; + VectorExpressionDescriptor.ArgumentType inputType = VectorExpressionDescriptor.ArgumentType.NONE; if (paramDescList.size() > 0) { ExprNodeDesc inputExpr = paramDescList.get(0); - inputType = getNormalizedTypeName(inputExpr.getTypeString()); - if (decimalTypePattern.matcher(inputType).matches()) { - inputType = "Decimal"; + inputType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(inputExpr.getTypeString()); + if (inputType == VectorExpressionDescriptor.ArgumentType.NONE) { + throw new HiveException("No vector argument type for Hive type name " + inputExpr.getTypeString()); } } - for (Object[] aggDef : aggregatesDefinition) { - if (aggregateName.equalsIgnoreCase((String) aggDef[0]) && - ((aggDef[1] == null && inputType == null) || - (aggDef[1] != null && aggDef[1].equals(inputType)))) { - Class<? extends VectorAggregateExpression> aggClass = - (Class<? extends VectorAggregateExpression>) (aggDef[2]); + for (AggregateDefinition aggDef : aggregatesDefinition) { + if (aggregateName.equalsIgnoreCase(aggDef.getName()) && + ((aggDef.getType() == VectorExpressionDescriptor.ArgumentType.NONE && + inputType == VectorExpressionDescriptor.ArgumentType.NONE) || + (aggDef.getType().isSameTypeOrFamily(inputType)))) { + Class<? extends VectorAggregateExpression> aggClass = aggDef.getAggClass(); try { Constructor<? extends VectorAggregateExpression> ctor = @@ -1769,7 +1967,7 @@ public class VectorizationContext { } throw new HiveException("Vector aggregate not implemented: \"" + aggregateName + - "\" for type: \"" + inputType + ""); + "\" for type: \"" + inputType.name() + ""); } public Map<Integer, String> getOutputColumnTypeMap() { Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java (original) +++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java Fri Sep 5 19:15:44 2014 @@ -23,11 +23,16 @@ import java.sql.Timestamp; import java.util.LinkedList; import java.util.List; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -126,6 +131,8 @@ public class VectorizedBatchUtil { break; case BINARY: case STRING: + case CHAR: + case VARCHAR: cvList.add(new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE)); break; case DECIMAL: @@ -375,6 +382,51 @@ public class VectorizedBatchUtil { } } break; + case CHAR: { + BytesColumnVector bcv = (BytesColumnVector) batch.cols[off + i]; + if (writableCol != null) { + bcv.isNull[rowIndex] = false; + HiveChar colHiveChar = ((HiveCharWritable) writableCol).getHiveChar(); + byte[] bytes = colHiveChar.getStrippedValue().getBytes(); + + // We assume the CHAR maximum length was enforced when the object was created. + int length = bytes.length; + + int start = buffer.getLength(); + try { + // In vector mode, we store CHAR as unpadded. + buffer.write(bytes, 0, length); + } catch (IOException ioe) { + throw new IllegalStateException("bad write", ioe); + } + bcv.setRef(rowIndex, buffer.getData(), start, length); + } else { + setNullColIsNullValue(bcv, rowIndex); + } + } + break; + case VARCHAR: { + BytesColumnVector bcv = (BytesColumnVector) batch.cols[off + i]; + if (writableCol != null) { + bcv.isNull[rowIndex] = false; + HiveVarchar colHiveVarchar = ((HiveVarcharWritable) writableCol).getHiveVarchar(); + byte[] bytes = colHiveVarchar.getValue().getBytes(); + + // We assume the VARCHAR maximum length was enforced when the object was created. + int length = bytes.length; + + int start = buffer.getLength(); + try { + buffer.write(bytes, 0, length); + } catch (IOException ioe) { + throw new IllegalStateException("bad write", ioe); + } + bcv.setRef(rowIndex, buffer.getData(), start, length); + } else { + setNullColIsNullValue(bcv, rowIndex); + } + } + break; case DECIMAL: DecimalColumnVector dcv = (DecimalColumnVector) batch.cols[off + i]; if (writableCol != null) { Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java (original) +++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java Fri Sep 5 19:15:44 2014 @@ -158,7 +158,10 @@ public class VectorizedColumnarSerDe ext serializeVectorStream.write(bytes, 0, bytes.length); } break; - case STRING: { + case STRING: + case CHAR: + case VARCHAR: { + // Is it correct to escape CHAR and VARCHAR? BytesColumnVector bcv = (BytesColumnVector) batch.cols[k]; LazyUtils.writeEscaped(serializeVectorStream, bcv.vector[rowIndex], bcv.start[rowIndex], Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java (original) +++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java Fri Sep 5 19:15:44 2014 @@ -278,7 +278,7 @@ public class VectorizedRowBatchCtx { case PRIMITIVE: { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi; // Vectorization currently only supports the following data types: - // BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, BINARY, STRING, TIMESTAMP, + // BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, BINARY, STRING, CHAR, VARCHAR, TIMESTAMP, // DATE and DECIMAL switch (poi.getPrimitiveCategory()) { case BOOLEAN: @@ -296,6 +296,8 @@ public class VectorizedRowBatchCtx { break; case BINARY: case STRING: + case CHAR: + case VARCHAR: result.cols[j] = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); break; case DECIMAL: @@ -544,7 +546,9 @@ public class VectorizedRowBatchCtx { } break; - case STRING: { + case STRING: + case CHAR: + case VARCHAR: { BytesColumnVector bcv = (BytesColumnVector) batch.cols[colIndex]; String sVal = (String) value; if (sVal == null) { @@ -566,13 +570,17 @@ public class VectorizedRowBatchCtx { } } - private void addScratchColumnsToBatch(VectorizedRowBatch vrb) { + private void addScratchColumnsToBatch(VectorizedRowBatch vrb) throws HiveException { if (columnTypeMap != null && !columnTypeMap.isEmpty()) { int origNumCols = vrb.numCols; int newNumCols = vrb.cols.length+columnTypeMap.keySet().size(); vrb.cols = Arrays.copyOf(vrb.cols, newNumCols); for (int i = origNumCols; i < newNumCols; i++) { - vrb.cols[i] = allocateColumnVector(columnTypeMap.get(i), + String typeName = columnTypeMap.get(i); + if (typeName == null) { + throw new HiveException("No type found for column type entry " + i); + } + vrb.cols[i] = allocateColumnVector(typeName, VectorizedRowBatch.DEFAULT_SIZE); } vrb.numCols = vrb.cols.length; @@ -599,13 +607,17 @@ public class VectorizedRowBatchCtx { private ColumnVector allocateColumnVector(String type, int defaultSize) { if (type.equalsIgnoreCase("double")) { return new DoubleColumnVector(defaultSize); - } else if (type.equalsIgnoreCase("string")) { + } else if (VectorizationContext.isStringFamily(type)) { return new BytesColumnVector(defaultSize); } else if (VectorizationContext.decimalTypePattern.matcher(type).matches()){ int [] precisionScale = getScalePrecisionFromDecimalType(type); return new DecimalColumnVector(defaultSize, precisionScale[0], precisionScale[1]); - } else { + } else if (type.equalsIgnoreCase("long") || + type.equalsIgnoreCase("date") || + type.equalsIgnoreCase("timestamp")) { return new LongColumnVector(defaultSize); + } else { + throw new Error("Cannot allocate vector column for " + type); } } Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java (original) +++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java Fri Sep 5 19:15:44 2014 @@ -420,8 +420,8 @@ public abstract class AbstractFilterStri VectorExpressionDescriptor.Mode.FILTER) .setNumArguments(2) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("string"), - VectorExpressionDescriptor.ArgumentType.getType("string")) + VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, + VectorExpressionDescriptor.ArgumentType.STRING_FAMILY) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToStringViaLongToString.java URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToStringViaLongToString.java?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToStringViaLongToString.java (original) +++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToStringViaLongToString.java Fri Sep 5 19:15:44 2014 @@ -22,17 +22,18 @@ import org.apache.hadoop.hive.ql.exec.ve public class CastBooleanToStringViaLongToString extends LongToStringUnaryUDF { private static final long serialVersionUID = 1L; - private transient byte[] temp; // space to put date string private static final byte[][] dictionary = { {'F', 'A', 'L', 'S', 'E'}, {'T', 'R', 'U', 'E'} }; + public CastBooleanToStringViaLongToString(int inputColumn, int outputColumn) { + super(inputColumn, outputColumn); + } + public CastBooleanToStringViaLongToString() { super(); - temp = new byte[8]; } - public CastBooleanToStringViaLongToString(int inputColumn, int outputColumn) { - super(inputColumn, outputColumn); - temp = new byte[8]; + protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) { + outV.setVal(i, bytes, 0, length); } @Override @@ -41,6 +42,6 @@ public class CastBooleanToStringViaLongT /* 0 is false and 1 is true in the input vector, so a simple dictionary is used * with two entries. 0 references FALSE and 1 references TRUE in the dictionary. */ - outV.setVal(i, dictionary[(int) vector[i]], 0, dictionary[(int) vector[i]].length); + assign(outV, i, dictionary[(int) vector[i]], dictionary[(int) vector[i]].length); } -} +} \ No newline at end of file Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java (original) +++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java Fri Sep 5 19:15:44 2014 @@ -35,10 +35,15 @@ public class CastDateToString extends Lo super(inputColumn, outputColumn); } + // The assign method will be overridden for CHAR and VARCHAR. + protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) { + outV.setVal(i, bytes, 0, length); + } + @Override protected void func(BytesColumnVector outV, long[] vector, int i) { dt.setTime(DateWritable.daysToMillis((int) vector[i])); byte[] temp = dt.toString().getBytes(); - outV.setVal(i, temp, 0, temp.length); + assign(outV, i, temp, temp.length); } } Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java?rev=1622763&r1=1622762&r2=1622763&view=diff ============================================================================== --- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java (original) +++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java Fri Sep 5 19:15:44 2014 @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
