Modified: hive/branches/llap/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java URL: http://svn.apache.org/viewvc/hive/branches/llap/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java (original) +++ hive/branches/llap/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java Tue Oct 14 19:06:45 2014 @@ -233,12 +233,22 @@ public class TxnHandler { } } - public static ValidTxnList createValidTxnList(GetOpenTxnsResponse txns) { + /** + * Transform a {@link org.apache.hadoop.hive.metastore.api.GetOpenTxnsResponse} to a + * {@link org.apache.hadoop.hive.common.ValidTxnList}. + * @param txns txn list from the metastore + * @param currentTxn Current transaction that the user has open. If this is greater than 0 it + * will be removed from the exceptions list so that the user sees his own + * transaction as valid. + * @return a valid txn list. + */ + public static ValidTxnList createValidTxnList(GetOpenTxnsResponse txns, long currentTxn) { long highWater = txns.getTxn_high_water_mark(); Set<Long> open = txns.getOpen_txns(); - long[] exceptions = new long[open.size()]; + long[] exceptions = new long[open.size() - (currentTxn > 0 ? 1 : 0)]; int i = 0; for(long txn: open) { + if (currentTxn > 0 && currentTxn == txn) continue; exceptions[i++] = txn; } return new ValidTxnListImpl(exceptions, highWater);
Modified: hive/branches/llap/metastore/src/test/org/apache/hadoop/hive/metastore/IpAddressListener.java URL: http://svn.apache.org/viewvc/hive/branches/llap/metastore/src/test/org/apache/hadoop/hive/metastore/IpAddressListener.java?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/metastore/src/test/org/apache/hadoop/hive/metastore/IpAddressListener.java (original) +++ hive/branches/llap/metastore/src/test/org/apache/hadoop/hive/metastore/IpAddressListener.java Tue Oct 14 19:06:45 2014 @@ -47,15 +47,10 @@ public class IpAddressListener extends M super(config); } - private String getIpFromInetAddress(String addr) { - return addr.substring(addr.indexOf('/') + 1); - } - private void checkIpAddress() { try { - String localhostIp = InetAddress.getByName(LOCAL_HOST).toString(); - Assert.assertEquals(getIpFromInetAddress(localhostIp), - getIpFromInetAddress(HMSHandler.getIpAddress())); + String localhostIp = InetAddress.getByName(LOCAL_HOST).getHostAddress(); + Assert.assertEquals(localhostIp, HMSHandler.getIpAddress()); } catch (UnknownHostException e) { Assert.assertTrue("InetAddress.getLocalHost threw an exception: " + e.getMessage(), false); } Modified: hive/branches/llap/packaging/pom.xml URL: http://svn.apache.org/viewvc/hive/branches/llap/packaging/pom.xml?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/packaging/pom.xml (original) +++ hive/branches/llap/packaging/pom.xml Tue Oct 14 19:06:45 2014 @@ -158,6 +158,12 @@ </dependency> <dependency> <groupId>org.apache.hive</groupId> + <artifactId>hive-jdbc</artifactId> + <version>${project.version}</version> + <classifier>${hive.jdbc.driver.classifier}</classifier> + </dependency> + <dependency> + <groupId>org.apache.hive</groupId> <artifactId>hive-beeline</artifactId> <version>${project.version}</version> </dependency> Modified: hive/branches/llap/pom.xml URL: http://svn.apache.org/viewvc/hive/branches/llap/pom.xml?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/pom.xml (original) +++ hive/branches/llap/pom.xml Tue Oct 14 19:06:45 2014 @@ -107,7 +107,7 @@ <commons-exec.version>1.1</commons-exec.version> <commons-httpclient.version>3.0.1</commons-httpclient.version> <commons-io.version>2.4</commons-io.version> - <commons-lang.version>2.4</commons-lang.version> + <commons-lang.version>2.6</commons-lang.version> <commons-lang3.version>3.1</commons-lang3.version> <commons-logging.version>1.1.3</commons-logging.version> <commons-pool.version>1.5.4</commons-pool.version> @@ -117,7 +117,7 @@ <groovy.version>2.1.6</groovy.version> <hadoop-20.version>0.20.2</hadoop-20.version> <hadoop-20S.version>1.2.1</hadoop-20S.version> - <hadoop-23.version>2.4.0</hadoop-23.version> + <hadoop-23.version>2.5.0</hadoop-23.version> <hadoop.bin.path>${basedir}/${hive.path.to.root}/testutils/hadoop</hadoop.bin.path> <hbase.hadoop1.version>0.98.3-hadoop1</hbase.hadoop1.version> <hbase.hadoop2.version>0.98.3-hadoop2</hbase.hadoop2.version> @@ -139,6 +139,7 @@ <libfb303.version>0.9.0</libfb303.version> <libthrift.version>0.9.0</libthrift.version> <log4j.version>1.2.16</log4j.version> + <opencsv.version>2.3</opencsv.version> <mockito-all.version>1.9.5</mockito-all.version> <mina.version>2.0.0-M5</mina.version> <!--netty is not a direct dependency but due to a change @@ -152,7 +153,7 @@ <stax.version>1.0.1</stax.version> <slf4j.version>1.7.5</slf4j.version> <ST4.version>4.0.4</ST4.version> - <tez.version>0.5.0</tez.version> + <tez.version>0.5.1</tez.version> <super-csv.version>2.2.0</super-csv.version> <tempus-fugit.version>1.1</tempus-fugit.version> <snappy.version>0.2</snappy.version> @@ -970,6 +971,11 @@ <dependencies> <dependency> <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-client</artifactId> + <version>${hadoop-20S.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-core</artifactId> <version>${hadoop-20S.version}</version> </dependency> @@ -1012,6 +1018,11 @@ <dependencies> <dependency> <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-client</artifactId> + <version>${hadoop-23.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>${hadoop-23.version}</version> </dependency> Modified: hive/branches/llap/ql/if/queryplan.thrift URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/if/queryplan.thrift?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/if/queryplan.thrift (original) +++ hive/branches/llap/ql/if/queryplan.thrift Tue Oct 14 19:06:45 2014 @@ -57,6 +57,9 @@ enum OperatorType { MUX, DEMUX, EVENT, + ORCFILEMERGE, + RCFILEMERGE, + MERGEJOIN, } struct Operator { Modified: hive/branches/llap/ql/pom.xml URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/pom.xml?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/pom.xml (original) +++ hive/branches/llap/ql/pom.xml Tue Oct 14 19:06:45 2014 @@ -28,6 +28,7 @@ <name>Hive Query Language</name> <properties> + <calcite.version>0.9.1-incubating-SNAPSHOT</calcite.version> <hive.path.to.root>..</hive.path.to.root> </properties> @@ -187,6 +188,42 @@ <version>${datanucleus-core.version}</version> </dependency> <dependency> + <groupId>org.apache.calcite</groupId> + <artifactId>calcite-core</artifactId> + <version>${calcite.version}</version> + <exclusions> + <!-- hsqldb interferes with the use of derby as the default db + in hive's use of datanucleus. + --> + <exclusion> + <groupId>org.hsqldb</groupId> + <artifactId>hsqldb</artifactId> + </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-databind</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>org.apache.calcite</groupId> + <artifactId>calcite-avatica</artifactId> + <version>${calcite.version}</version> + <exclusions> + <!-- hsqldb interferes with the use of derby as the default db + in hive's use of datanucleus. + --> + <exclusion> + <groupId>org.hsqldb</groupId> + <artifactId>hsqldb</artifactId> + </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-databind</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> <groupId>com.google.guava</groupId> <artifactId>guava</artifactId> <version>${guava.version}</version> @@ -216,6 +253,11 @@ <artifactId>stax-api</artifactId> <version>${stax.version}</version> </dependency> + <dependency> + <groupId>net.sf.opencsv</groupId> + <artifactId>opencsv</artifactId> + <version>${opencsv.version}</version> + </dependency> <!-- test intra-project --> <!-- test inter-project --> <dependency> @@ -577,6 +619,7 @@ <include>com.twitter:parquet-hadoop-bundle</include> <include>org.apache.thrift:libthrift</include> <include>commons-lang:commons-lang</include> + <include>org.apache.commons:commons-lang3</include> <include>org.jodd:jodd-core</include> <include>org.json:json</include> <include>org.apache.avro:avro</include> @@ -594,6 +637,7 @@ <include>org.codehaus.jackson:jackson-core-asl</include> <include>org.codehaus.jackson:jackson-mapper-asl</include> <include>com.google.guava:guava</include> + <include>net.sf.opencsv:opencsv</include> </includes> </artifactSet> <relocations> Modified: hive/branches/llap/ql/src/gen/thrift/gen-cpp/queryplan_types.cpp URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/gen/thrift/gen-cpp/queryplan_types.cpp?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/src/gen/thrift/gen-cpp/queryplan_types.cpp (original) +++ hive/branches/llap/ql/src/gen/thrift/gen-cpp/queryplan_types.cpp Tue Oct 14 19:06:45 2014 @@ -52,7 +52,10 @@ int _kOperatorTypeValues[] = { OperatorType::PTF, OperatorType::MUX, OperatorType::DEMUX, - OperatorType::EVENT + OperatorType::EVENT, + OperatorType::ORCFILEMERGE, + OperatorType::RCFILEMERGE, + OperatorType::MERGEJOIN }; const char* _kOperatorTypeNames[] = { "JOIN", @@ -76,9 +79,12 @@ const char* _kOperatorTypeNames[] = { "PTF", "MUX", "DEMUX", - "EVENT" + "EVENT", + "ORCFILEMERGE", + "RCFILEMERGE", + "MERGEJOIN" }; -const std::map<int, const char*> _OperatorType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(22, _kOperatorTypeValues, _kOperatorTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL)); +const std::map<int, const char*> _OperatorType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(25, _kOperatorTypeValues, _kOperatorTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL)); int _kTaskTypeValues[] = { TaskType::MAP, Modified: hive/branches/llap/ql/src/gen/thrift/gen-cpp/queryplan_types.h URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/gen/thrift/gen-cpp/queryplan_types.h?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/src/gen/thrift/gen-cpp/queryplan_types.h (original) +++ hive/branches/llap/ql/src/gen/thrift/gen-cpp/queryplan_types.h Tue Oct 14 19:06:45 2014 @@ -57,7 +57,10 @@ struct OperatorType { PTF = 18, MUX = 19, DEMUX = 20, - EVENT = 21 + EVENT = 21, + ORCFILEMERGE = 22, + RCFILEMERGE = 23, + MERGEJOIN = 24 }; }; Modified: hive/branches/llap/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java (original) +++ hive/branches/llap/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java Tue Oct 14 19:06:45 2014 @@ -7,6 +7,10 @@ package org.apache.hadoop.hive.ql.plan.api; +import java.util.Map; +import java.util.HashMap; +import org.apache.thrift.TEnum; + public enum OperatorType implements org.apache.thrift.TEnum { JOIN(0), MAPJOIN(1), @@ -31,7 +35,8 @@ public enum OperatorType implements org. DEMUX(20), EVENT(21), ORCFILEMERGE(22), - RCFILEMERGE(23); + RCFILEMERGE(23), + MERGEJOIN(24); private final int value; @@ -100,6 +105,8 @@ public enum OperatorType implements org. return ORCFILEMERGE; case 23: return RCFILEMERGE; + case 24: + return MERGEJOIN; default: return null; } Modified: hive/branches/llap/ql/src/gen/thrift/gen-php/Types.php URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/gen/thrift/gen-php/Types.php?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/src/gen/thrift/gen-php/Types.php (original) +++ hive/branches/llap/ql/src/gen/thrift/gen-php/Types.php Tue Oct 14 19:06:45 2014 @@ -57,6 +57,9 @@ final class OperatorType { const MUX = 19; const DEMUX = 20; const EVENT = 21; + const ORCFILEMERGE = 22; + const RCFILEMERGE = 23; + const MERGEJOIN = 24; static public $__names = array( 0 => 'JOIN', 1 => 'MAPJOIN', @@ -80,6 +83,9 @@ final class OperatorType { 19 => 'MUX', 20 => 'DEMUX', 21 => 'EVENT', + 22 => 'ORCFILEMERGE', + 23 => 'RCFILEMERGE', + 24 => 'MERGEJOIN', ); } Modified: hive/branches/llap/ql/src/gen/thrift/gen-py/queryplan/ttypes.py URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/gen/thrift/gen-py/queryplan/ttypes.py?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/src/gen/thrift/gen-py/queryplan/ttypes.py (original) +++ hive/branches/llap/ql/src/gen/thrift/gen-py/queryplan/ttypes.py Tue Oct 14 19:06:45 2014 @@ -67,6 +67,9 @@ class OperatorType: MUX = 19 DEMUX = 20 EVENT = 21 + ORCFILEMERGE = 22 + RCFILEMERGE = 23 + MERGEJOIN = 24 _VALUES_TO_NAMES = { 0: "JOIN", @@ -91,6 +94,9 @@ class OperatorType: 19: "MUX", 20: "DEMUX", 21: "EVENT", + 22: "ORCFILEMERGE", + 23: "RCFILEMERGE", + 24: "MERGEJOIN", } _NAMES_TO_VALUES = { @@ -116,6 +122,9 @@ class OperatorType: "MUX": 19, "DEMUX": 20, "EVENT": 21, + "ORCFILEMERGE": 22, + "RCFILEMERGE": 23, + "MERGEJOIN": 24, } class TaskType: Modified: hive/branches/llap/ql/src/gen/thrift/gen-rb/queryplan_types.rb URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/gen/thrift/gen-rb/queryplan_types.rb?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/src/gen/thrift/gen-rb/queryplan_types.rb (original) +++ hive/branches/llap/ql/src/gen/thrift/gen-rb/queryplan_types.rb Tue Oct 14 19:06:45 2014 @@ -43,8 +43,11 @@ module OperatorType MUX = 19 DEMUX = 20 EVENT = 21 - VALUE_MAP = {0 => "JOIN", 1 => "MAPJOIN", 2 => "EXTRACT", 3 => "FILTER", 4 => "FORWARD", 5 => "GROUPBY", 6 => "LIMIT", 7 => "SCRIPT", 8 => "SELECT", 9 => "TABLESCAN", 10 => "FILESINK", 11 => "REDUCESINK", 12 => "UNION", 13 => "UDTF", 14 => "LATERALVIEWJOIN", 15 => "LATERALVIEWFORWARD", 16 => "HASHTABLESINK", 17 => "HASHTABLEDUMMY", 18 => "PTF", 19 => "MUX", 20 => "DEMUX", 21 => "EVENT"} - VALID_VALUES = Set.new([JOIN, MAPJOIN, EXTRACT, FILTER, FORWARD, GROUPBY, LIMIT, SCRIPT, SELECT, TABLESCAN, FILESINK, REDUCESINK, UNION, UDTF, LATERALVIEWJOIN, LATERALVIEWFORWARD, HASHTABLESINK, HASHTABLEDUMMY, PTF, MUX, DEMUX, EVENT]).freeze + ORCFILEMERGE = 22 + RCFILEMERGE = 23 + MERGEJOIN = 24 + VALUE_MAP = {0 => "JOIN", 1 => "MAPJOIN", 2 => "EXTRACT", 3 => "FILTER", 4 => "FORWARD", 5 => "GROUPBY", 6 => "LIMIT", 7 => "SCRIPT", 8 => "SELECT", 9 => "TABLESCAN", 10 => "FILESINK", 11 => "REDUCESINK", 12 => "UNION", 13 => "UDTF", 14 => "LATERALVIEWJOIN", 15 => "LATERALVIEWFORWARD", 16 => "HASHTABLESINK", 17 => "HASHTABLEDUMMY", 18 => "PTF", 19 => "MUX", 20 => "DEMUX", 21 => "EVENT", 22 => "ORCFILEMERGE", 23 => "RCFILEMERGE", 24 => "MERGEJOIN"} + VALID_VALUES = Set.new([JOIN, MAPJOIN, EXTRACT, FILTER, FORWARD, GROUPBY, LIMIT, SCRIPT, SELECT, TABLESCAN, FILESINK, REDUCESINK, UNION, UDTF, LATERALVIEWJOIN, LATERALVIEWFORWARD, HASHTABLESINK, HASHTABLEDUMMY, PTF, MUX, DEMUX, EVENT, ORCFILEMERGE, RCFILEMERGE, MERGEJOIN]).freeze end module TaskType Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/Driver.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/Driver.java?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/Driver.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/Driver.java Tue Oct 14 19:06:45 2014 @@ -390,6 +390,14 @@ public class Driver implements CommandPr tree = ParseUtils.findRootNonNullToken(tree); perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARSE); + // Initialize the transaction manager. This must be done before analyze is called. Also + // record the valid transactions for this query. We have to do this at compile time + // because we use the information in planning the query. Also, + // we want to record it at this point so that users see data valid at the point that they + // submit the query. + SessionState.get().initTxnMgr(conf); + recordValidTxns(); + perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ANALYZE); BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, tree); List<HiveSemanticAnalyzerHook> saHooks = @@ -401,6 +409,8 @@ public class Driver implements CommandPr HiveSemanticAnalyzerHookContext hookCtx = new HiveSemanticAnalyzerHookContextImpl(); hookCtx.setConf(conf); hookCtx.setUserName(userName); + hookCtx.setIpAddress(SessionState.get().getUserIpAddress()); + hookCtx.setCommand(command); for (HiveSemanticAnalyzerHook hook : saHooks) { tree = hook.preAnalyze(hookCtx, tree); } @@ -422,7 +432,8 @@ public class Driver implements CommandPr sem.validate(); perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ANALYZE); - plan = new QueryPlan(command, sem, perfLogger.getStartTime(PerfLogger.DRIVER_RUN), queryId); + plan = new QueryPlan(command, sem, perfLogger.getStartTime(PerfLogger.DRIVER_RUN), queryId, + SessionState.get().getCommandType()); String queryStr = plan.getQueryStr(); conf.setVar(HiveConf.ConfVars.HIVEQUERYSTRING, queryStr); @@ -503,9 +514,11 @@ public class Driver implements CommandPr // get mapping of tables to columns used ColumnAccessInfo colAccessInfo = sem.getColumnAccessInfo(); // colAccessInfo is set only in case of SemanticAnalyzer - Map<String, List<String>> tab2Cols = colAccessInfo != null ? colAccessInfo + Map<String, List<String>> selectTab2Cols = colAccessInfo != null ? colAccessInfo .getTableToColumnAccessMap() : null; - doAuthorizationV2(ss, op, inputs, outputs, command, tab2Cols); + Map<String, List<String>> updateTab2Cols = sem.getUpdateColumnAccessInfo() != null ? + sem.getUpdateColumnAccessInfo().getTableToColumnAccessMap() : null; + doAuthorizationV2(ss, op, inputs, outputs, command, selectTab2Cols, updateTab2Cols); return; } if (op == null) { @@ -696,7 +709,13 @@ public class Driver implements CommandPr } private static void doAuthorizationV2(SessionState ss, HiveOperation op, HashSet<ReadEntity> inputs, - HashSet<WriteEntity> outputs, String command, Map<String, List<String>> tab2cols) throws HiveException { + HashSet<WriteEntity> outputs, String command, Map<String, List<String>> tab2cols, + Map<String, List<String>> updateTab2Cols) throws HiveException { + + /* comment for reviewers -> updateTab2Cols needed to be separate from tab2cols because if I + pass tab2cols to getHivePrivObjects for the output case it will trip up insert/selects, + since the insert will get passed the columns from the select. + */ HiveAuthzContext.Builder authzContextBuilder = new HiveAuthzContext.Builder(); authzContextBuilder.setUserIpAddress(ss.getUserIpAddress()); @@ -704,7 +723,7 @@ public class Driver implements CommandPr HiveOperationType hiveOpType = getHiveOperationType(op); List<HivePrivilegeObject> inputsHObjs = getHivePrivObjects(inputs, tab2cols); - List<HivePrivilegeObject> outputHObjs = getHivePrivObjects(outputs, null); + List<HivePrivilegeObject> outputHObjs = getHivePrivObjects(outputs, updateTab2Cols); ss.getAuthorizerV2().checkPrivileges(hiveOpType, inputsHObjs, outputHObjs, authzContextBuilder.build()); } @@ -730,12 +749,6 @@ public class Driver implements CommandPr //do not authorize temporary uris continue; } - if (privObject instanceof ReadEntity && ((ReadEntity)privObject).isUpdateOrDelete()) { - // Skip this one, as we don't want to check select privileges for the table we're reading - // for an update or delete. - continue; - } - //support for authorization on partitions needs to be added String dbname = null; String objName = null; @@ -868,28 +881,24 @@ public class Driver implements CommandPr // Write the current set of valid transactions into the conf file so that it can be read by // the input format. - private int recordValidTxns() { - try { - ValidTxnList txns = SessionState.get().getTxnMgr().getValidTxns(); - String txnStr = txns.toString(); - conf.set(ValidTxnList.VALID_TXNS_KEY, txnStr); - LOG.debug("Encoding valid txns info " + txnStr); - return 0; - } catch (LockException e) { - errorMessage = "FAILED: Error in determing valid transactions: " + e.getMessage(); - SQLState = ErrorMsg.findSQLState(e.getMessage()); - downstreamError = e; - console.printError(errorMessage, "\n" - + org.apache.hadoop.util.StringUtils.stringifyException(e)); - return 10; - } + private void recordValidTxns() throws LockException { + ValidTxnList txns = SessionState.get().getTxnMgr().getValidTxns(); + String txnStr = txns.toString(); + conf.set(ValidTxnList.VALID_TXNS_KEY, txnStr); + LOG.debug("Encoding valid txns info " + txnStr); + // TODO I think when we switch to cross query transactions we need to keep this list in + // session state rather than agressively encoding it in the conf like this. We can let the + // TableScanOperators then encode it in the conf before calling the input formats. } /** * Acquire read and write locks needed by the statement. The list of objects to be locked are - * obtained from he inputs and outputs populated by the compiler. The lock acuisition scheme is + * obtained from the inputs and outputs populated by the compiler. The lock acuisition scheme is * pretty simple. If all the locks cannot be obtained, error out. Deadlock is avoided by making * sure that the locks are lexicographically sorted. + * + * This method also records the list of valid transactions. This must be done after any + * transactions have been opened and locks acquired. **/ private int acquireLocksAndOpenTxn() { PerfLogger perfLogger = PerfLogger.getPerfLogger(); @@ -925,6 +934,9 @@ public class Driver implements CommandPr desc.setTransactionId(txnId); } } + + // TODO Once we move to cross query transactions we need to add the open transaction to + // our list of valid transactions. We don't have a way to do that right now. } txnMgr.acquireLocks(plan, ctx, userFromUGI); @@ -1106,11 +1118,6 @@ public class Driver implements CommandPr SessionState ss = SessionState.get(); try { ckLock = checkConcurrency(); - try { - ss.initTxnMgr(conf); - } catch (LockException e) { - throw new SemanticException(e.getMessage(), e); - } } catch (SemanticException e) { errorMessage = "FAILED: Error in semantic analysis: " + e.getMessage(); SQLState = ErrorMsg.findSQLState(e.getMessage()); @@ -1119,11 +1126,8 @@ public class Driver implements CommandPr + org.apache.hadoop.util.StringUtils.stringifyException(e)); return createProcessorResponse(10); } - int ret = recordValidTxns(); - if (ret != 0) { - return createProcessorResponse(ret); - } + int ret; if (!alreadyCompiled) { ret = compileInternal(command); if (ret != 0) { Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java Tue Oct 14 19:06:45 2014 @@ -84,7 +84,8 @@ public enum ErrorMsg { INVALID_PATH(10027, "Invalid path"), ILLEGAL_PATH(10028, "Path is not legal"), INVALID_NUMERICAL_CONSTANT(10029, "Invalid numerical constant"), - INVALID_ARRAYINDEX_CONSTANT(10030, "Non-constant expressions for array indexes not supported"), + INVALID_ARRAYINDEX_TYPE(10030, + "Not proper type for index of ARRAY. Currently, only integer type is supported"), INVALID_MAPINDEX_CONSTANT(10031, "Non-constant expression for map indexes not supported"), INVALID_MAPINDEX_TYPE(10032, "MAP key type does not match index expression type"), NON_COLLECTION_TYPE(10033, "[] not valid on non-collection types"), @@ -416,6 +417,10 @@ public enum ErrorMsg { "that implements AcidOutputFormat while transaction manager that supports ACID is in use"), VALUES_TABLE_CONSTRUCTOR_NOT_SUPPORTED(10296, "Values clause with table constructor not yet supported"), + ACID_OP_ON_NONACID_TABLE(10297, "Attempt to do update or delete on table {0} that does not use " + + "an AcidOutputFormat or is not bucketed", true), + ACID_NO_SORTED_BUCKETS(10298, "ACID insert, update, delete not supported on tables that are " + + "sorted, table {0}", true), //========================== 20000 range starts here ========================// SCRIPT_INIT_ERROR(20000, "Unable to initialize custom script."), Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java Tue Oct 14 19:06:45 2014 @@ -104,16 +104,14 @@ public class QueryPlan implements Serial private QueryProperties queryProperties; private transient Long queryStartTime; + private String operationName; public QueryPlan() { this.reducerTimeStatsPerJobList = new ArrayList<ReducerTimeStatsPerJob>(); } - public QueryPlan(String queryString, BaseSemanticAnalyzer sem, Long startTime) { - this(queryString, sem, startTime, null); - } - - public QueryPlan(String queryString, BaseSemanticAnalyzer sem, Long startTime, String queryId) { + public QueryPlan(String queryString, BaseSemanticAnalyzer sem, Long startTime, String queryId, + String operationName) { this.queryString = queryString; rootTasks = new ArrayList<Task<? extends Serializable>>(); @@ -134,6 +132,7 @@ public class QueryPlan implements Serial query.putToQueryAttributes("queryString", this.queryString); queryProperties = sem.getQueryProperties(); queryStartTime = startTime; + this.operationName = operationName; } public String getQueryStr() { @@ -786,4 +785,8 @@ public class QueryPlan implements Serial public void setQueryStartTime(Long queryStartTime) { this.queryStartTime = queryStartTime; } + + public String getOperationName() { + return operationName; + } } Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java Tue Oct 14 19:06:45 2014 @@ -48,12 +48,37 @@ public class QueryProperties { boolean mapJoinRemoved = false; boolean hasMapGroupBy = false; + private int noOfJoins = 0; + private int noOfOuterJoins = 0; + private boolean hasLateralViews; + + private boolean multiDestQuery; + private boolean filterWithSubQuery; + public boolean hasJoin() { - return hasJoin; + return (noOfJoins > 0); } - public void setHasJoin(boolean hasJoin) { - this.hasJoin = hasJoin; + public void incrementJoinCount(boolean outerJoin) { + noOfJoins++; + if (outerJoin) + noOfOuterJoins++; + } + + public int getJoinCount() { + return noOfJoins; + } + + public int getOuterJoinCount() { + return noOfOuterJoins; + } + + public void setHasLateralViews(boolean hasLateralViews) { + this.hasLateralViews = hasLateralViews; + } + + public boolean hasLateralViews() { + return hasLateralViews; } public boolean hasGroupBy() { @@ -144,6 +169,22 @@ public class QueryProperties { this.hasMapGroupBy = hasMapGroupBy; } + public boolean hasMultiDestQuery() { + return this.multiDestQuery; + } + + public void setMultiDestQuery(boolean multiDestQuery) { + this.multiDestQuery = multiDestQuery; + } + + public void setFilterWithSubQuery(boolean filterWithSubQuery) { + this.filterWithSubQuery = filterWithSubQuery; + } + + public boolean hasFilterWithSubQuery() { + return this.filterWithSubQuery; + } + public void clear() { hasJoin = false; hasGroupBy = false; @@ -160,5 +201,11 @@ public class QueryProperties { hasClusterBy = false; mapJoinRemoved = false; hasMapGroupBy = false; + + noOfJoins = 0; + noOfOuterJoins = 0; + + multiDestQuery = false; + filterWithSubQuery = false; } } Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java Tue Oct 14 19:06:45 2014 @@ -61,13 +61,13 @@ public abstract class AbstractMapJoinOpe @Override @SuppressWarnings("unchecked") protected void initializeOp(Configuration hconf) throws HiveException { - int tagLen = conf.getTagLength(); - - joinKeys = new List[tagLen]; - - JoinUtil.populateJoinKeyValue(joinKeys, conf.getKeys(), NOTSKIPBIGTABLE); - joinKeysObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinKeys, - inputObjInspectors,NOTSKIPBIGTABLE, tagLen); + if (conf.getGenJoinKeys()) { + int tagLen = conf.getTagLength(); + joinKeys = new List[tagLen]; + JoinUtil.populateJoinKeyValue(joinKeys, conf.getKeys(), NOTSKIPBIGTABLE); + joinKeysObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinKeys, + inputObjInspectors,NOTSKIPBIGTABLE, tagLen); + } super.initializeOp(hconf); Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/AppMasterEventOperator.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/AppMasterEventOperator.java?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/AppMasterEventOperator.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/AppMasterEventOperator.java Tue Oct 14 19:06:45 2014 @@ -44,10 +44,10 @@ import org.apache.tez.runtime.api.events @SuppressWarnings({ "deprecation", "serial" }) public class AppMasterEventOperator extends Operator<AppMasterEventDesc> { - private transient Serializer serializer; - private transient DataOutputBuffer buffer; - private transient boolean hasReachedMaxSize = false; - private transient long MAX_SIZE; + protected transient Serializer serializer; + protected transient DataOutputBuffer buffer; + protected transient boolean hasReachedMaxSize = false; + protected transient long MAX_SIZE; @Override public void initializeOp(Configuration hconf) throws HiveException { @@ -57,12 +57,9 @@ public class AppMasterEventOperator exte initDataBuffer(false); } - private void initDataBuffer(boolean skipPruning) throws HiveException { + protected void initDataBuffer(boolean skipPruning) throws HiveException { buffer = new DataOutputBuffer(); try { - // where does this go to? - buffer.writeUTF(((TezContext) TezContext.get()).getTezProcessorContext().getTaskVertexName()); - // add any other header info getConf().writeEventHeader(buffer); Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java Tue Oct 14 19:06:45 2014 @@ -323,7 +323,6 @@ public abstract class CommonJoinOperator @Override public void startGroup() throws HiveException { - LOG.trace("Join: Starting new group"); newGroupStarted = true; for (AbstractRowContainer<List<Object>> alw : storage) { alw.clearRows(); @@ -632,8 +631,6 @@ public abstract class CommonJoinOperator */ @Override public void endGroup() throws HiveException { - LOG.trace("Join Op: endGroup called: numValues=" + numAliases); - checkAndGenObject(); } @@ -719,7 +716,6 @@ public abstract class CommonJoinOperator if (noOuterJoin) { if (alw.rowCount() == 0) { - LOG.trace("No data for alias=" + i); return; } else if (alw.rowCount() > 1) { mayHasMoreThanOne = true; @@ -776,7 +772,6 @@ public abstract class CommonJoinOperator */ @Override public void closeOp(boolean abort) throws HiveException { - LOG.trace("Join Op close"); for (AbstractRowContainer<List<Object>> alw : storage) { if (alw != null) { alw.clearRows(); // clean up the temp files Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java Tue Oct 14 19:06:45 2014 @@ -2278,7 +2278,12 @@ public class DDLTask extends Task<DDLWor Set<String> funcs = null; if (showFuncs.getPattern() != null) { LOG.info("pattern: " + showFuncs.getPattern()); - funcs = FunctionRegistry.getFunctionNames(showFuncs.getPattern()); + if (showFuncs.getIsLikePattern()) { + funcs = FunctionRegistry.getFunctionNamesByLikePattern(showFuncs.getPattern()); + } else { + console.printInfo("SHOW FUNCTIONS is deprecated, please use SHOW FUNCTIONS LIKE instead."); + funcs = FunctionRegistry.getFunctionNames(showFuncs.getPattern()); + } LOG.info("results : " + funcs.size()); } else { funcs = FunctionRegistry.getFunctionNames(); @@ -3275,19 +3280,21 @@ public class DDLTask extends Task<DDLWor } Table oldTbl = tbl.copy(); + List<FieldSchema> oldCols = (part == null ? tbl.getCols() : part.getCols()); + StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.RENAME) { tbl.setDbName(Utilities.getDatabaseName(alterTbl.getNewName())); tbl.setTableName(Utilities.getTableName(alterTbl.getNewName())); } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDCOLS) { List<FieldSchema> newCols = alterTbl.getNewCols(); - List<FieldSchema> oldCols = tbl.getCols(); - if (tbl.getSerializationLib().equals( + String serializationLib = sd.getSerdeInfo().getSerializationLib(); + if (serializationLib.equals( "org.apache.hadoop.hive.serde.thrift.columnsetSerDe")) { console .printInfo("Replacing columns for columnsetSerDe and changing to LazySimpleSerDe"); - tbl.setSerializationLib(LazySimpleSerDe.class.getName()); - tbl.getTTable().getSd().setCols(newCols); + sd.getSerdeInfo().setSerializationLib(LazySimpleSerDe.class.getName()); + sd.setCols(newCols); } else { // make sure the columns does not already exist Iterator<FieldSchema> iterNewCols = newCols.iterator(); @@ -3303,10 +3310,9 @@ public class DDLTask extends Task<DDLWor } oldCols.add(newCol); } - tbl.getTTable().getSd().setCols(oldCols); + sd.setCols(oldCols); } } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.RENAMECOLUMN) { - List<FieldSchema> oldCols = tbl.getCols(); List<FieldSchema> newCols = new ArrayList<FieldSchema>(); Iterator<FieldSchema> iterOldCols = oldCols.iterator(); String oldName = alterTbl.getOldColName(); @@ -3367,24 +3373,24 @@ public class DDLTask extends Task<DDLWor newCols.add(position, column); } - tbl.getTTable().getSd().setCols(newCols); - + sd.setCols(newCols); } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.REPLACECOLS) { // change SerDe to LazySimpleSerDe if it is columnsetSerDe - if (tbl.getSerializationLib().equals( + String serializationLib = sd.getSerdeInfo().getSerializationLib(); + if (serializationLib.equals( "org.apache.hadoop.hive.serde.thrift.columnsetSerDe")) { console .printInfo("Replacing columns for columnsetSerDe and changing to LazySimpleSerDe"); - tbl.setSerializationLib(LazySimpleSerDe.class.getName()); - } else if (!tbl.getSerializationLib().equals( + sd.getSerdeInfo().setSerializationLib(LazySimpleSerDe.class.getName()); + } else if (!serializationLib.equals( MetadataTypedColumnsetSerDe.class.getName()) - && !tbl.getSerializationLib().equals(LazySimpleSerDe.class.getName()) - && !tbl.getSerializationLib().equals(ColumnarSerDe.class.getName()) - && !tbl.getSerializationLib().equals(DynamicSerDe.class.getName()) - && !tbl.getSerializationLib().equals(ParquetHiveSerDe.class.getName())) { + && !serializationLib.equals(LazySimpleSerDe.class.getName()) + && !serializationLib.equals(ColumnarSerDe.class.getName()) + && !serializationLib.equals(DynamicSerDe.class.getName()) + && !serializationLib.equals(ParquetHiveSerDe.class.getName())) { throw new HiveException(ErrorMsg.CANNOT_REPLACE_COLUMNS, alterTbl.getOldName()); } - tbl.getTTable().getSd().setCols(alterTbl.getNewCols()); + sd.setCols(alterTbl.getNewCols()); } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDPROPS) { tbl.getTTable().getParameters().putAll(alterTbl.getProps()); } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.DROPPROPS) { @@ -3393,47 +3399,26 @@ public class DDLTask extends Task<DDLWor tbl.getTTable().getParameters().remove(keyItr.next()); } } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSERDEPROPS) { - if (part != null) { - part.getTPartition().getSd().getSerdeInfo().getParameters().putAll( - alterTbl.getProps()); - } else { - tbl.getTTable().getSd().getSerdeInfo().getParameters().putAll( - alterTbl.getProps()); - } + sd.getSerdeInfo().getParameters().putAll(alterTbl.getProps()); } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSERDE) { String serdeName = alterTbl.getSerdeName(); + sd.getSerdeInfo().setSerializationLib(serdeName); + if ((alterTbl.getProps() != null) && (alterTbl.getProps().size() > 0)) { + sd.getSerdeInfo().getParameters().putAll(alterTbl.getProps()); + } if (part != null) { - part.getTPartition().getSd().getSerdeInfo().setSerializationLib(serdeName); - if ((alterTbl.getProps() != null) && (alterTbl.getProps().size() > 0)) { - part.getTPartition().getSd().getSerdeInfo().getParameters().putAll( - alterTbl.getProps()); - } part.getTPartition().getSd().setCols(part.getTPartition().getSd().getCols()); } else { - tbl.setSerializationLib(alterTbl.getSerdeName()); - if ((alterTbl.getProps() != null) && (alterTbl.getProps().size() > 0)) { - tbl.getTTable().getSd().getSerdeInfo().getParameters().putAll( - alterTbl.getProps()); - } if (!Table.hasMetastoreBasedSchema(conf, serdeName)) { tbl.setFields(Hive.getFieldsFromDeserializer(tbl.getTableName(), tbl. getDeserializer())); } } } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDFILEFORMAT) { - if(part != null) { - part.getTPartition().getSd().setInputFormat(alterTbl.getInputFormat()); - part.getTPartition().getSd().setOutputFormat(alterTbl.getOutputFormat()); - if (alterTbl.getSerdeName() != null) { - part.getTPartition().getSd().getSerdeInfo().setSerializationLib( - alterTbl.getSerdeName()); - } - } else { - tbl.getTTable().getSd().setInputFormat(alterTbl.getInputFormat()); - tbl.getTTable().getSd().setOutputFormat(alterTbl.getOutputFormat()); - if (alterTbl.getSerdeName() != null) { - tbl.setSerializationLib(alterTbl.getSerdeName()); - } + sd.setInputFormat(alterTbl.getInputFormat()); + sd.setOutputFormat(alterTbl.getOutputFormat()); + if (alterTbl.getSerdeName() != null) { + sd.getSerdeInfo().setSerializationLib(alterTbl.getSerdeName()); } } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ALTERPROTECTMODE) { boolean protectModeEnable = alterTbl.isProtectModeEnable(); @@ -3463,8 +3448,6 @@ public class DDLTask extends Task<DDLWor .getColumnNamesFromSortCols(alterTbl.getSortColumns())); } - StorageDescriptor sd = part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd(); - if (alterTbl.isTurnOffSorting()) { sd.setSortCols(new ArrayList<Order>()); } else if (alterTbl.getNumberBuckets() == -1) { @@ -3485,11 +3468,7 @@ public class DDLTask extends Task<DDLWor || locUri.getScheme().trim().equals("")) { throw new HiveException(ErrorMsg.BAD_LOCATION_VALUE, newLocation); } - if (part != null) { - part.setLocation(newLocation); - } else { - tbl.setDataLocation(new Path(locUri)); - } + sd.setLocation(newLocation); } catch (URISyntaxException e) { throw new HiveException(e); } @@ -3689,7 +3668,7 @@ public class DDLTask extends Task<DDLWor } // drop the table - db.dropTable(dropTbl.getTableName()); + db.dropTable(dropTbl.getTableName(), dropTbl.getIfPurge()); if (tbl != null) { // We have already locked the table in DDLSemanticAnalyzer, don't do it again here work.getOutputs().add(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK)); @@ -4233,7 +4212,7 @@ public class DDLTask extends Task<DDLWor String statVal = props.get(stat); if (statVal != null && Long.parseLong(statVal) > 0) { statsPresent = true; - props.put(statVal, "0"); + props.put(stat, "0"); props.put(StatsSetupConst.COLUMN_STATS_ACCURATE, "false"); } } Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/DummyStoreOperator.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/DummyStoreOperator.java?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/DummyStoreOperator.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/DummyStoreOperator.java Tue Oct 14 19:06:45 2014 @@ -65,7 +65,7 @@ import org.apache.hadoop.hive.serde2.obj */ public class DummyStoreOperator extends Operator<DummyStoreDesc> implements Serializable { - private transient InspectableObject result; + protected transient InspectableObject result; public DummyStoreOperator() { super(); Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java Tue Oct 14 19:06:45 2014 @@ -45,6 +45,7 @@ public class ExprNodeGenericFuncEvaluato transient ExprNodeEvaluator[] children; transient GenericUDF.DeferredObject[] deferredChildren; transient boolean isEager; + transient boolean isConstant = false; /** * Class to allow deferred evaluation for GenericUDF. @@ -124,7 +125,10 @@ public class ExprNodeGenericFuncEvaluato if (context != null) { context.setup(genericUDF); } - return outputOI = genericUDF.initializeAndFoldConstants(childrenOIs); + outputOI = genericUDF.initializeAndFoldConstants(childrenOIs); + isConstant = ObjectInspectorUtils.isConstantObjectInspector(outputOI) + && isDeterministic(); + return outputOI; } @Override @@ -154,12 +158,11 @@ public class ExprNodeGenericFuncEvaluato @Override protected Object _evaluate(Object row, int version) throws HiveException { - rowObject = row; - if (ObjectInspectorUtils.isConstantObjectInspector(outputOI) && - isDeterministic()) { + if (isConstant) { // The output of this UDF is constant, so don't even bother evaluating. - return ((ConstantObjectInspector)outputOI).getWritableConstantValue(); + return ((ConstantObjectInspector) outputOI).getWritableConstantValue(); } + rowObject = row; for (int i = 0; i < deferredChildren.length; i++) { deferredChildren[i].prepare(version); } Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java Tue Oct 14 19:06:45 2014 @@ -27,6 +27,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import org.apache.commons.lang3.StringEscapeUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -34,10 +35,9 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; -import org.apache.hadoop.hive.ql.exec.FooterBuffer; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.io.HiveRecordReader; @@ -48,7 +48,6 @@ import org.apache.hadoop.hive.ql.plan.Fe import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; -import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; @@ -60,11 +59,8 @@ import org.apache.hadoop.hive.serde2.obj import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapred.InputFormat; @@ -82,6 +78,9 @@ public class FetchOperator implements Se static Log LOG = LogFactory.getLog(FetchOperator.class.getName()); static LogHelper console = new LogHelper(LOG); + public static final String FETCH_OPERATOR_DIRECTORY_LIST = + "hive.complete.dir.list"; + private boolean isNativeTable; private FetchWork work; protected Operator<?> operator; // operator tree for processing row further (option) @@ -166,7 +165,7 @@ public class FetchOperator implements Se private void setupExecContext() { if (hasVC || work.getSplitSample() != null) { - context = new ExecMapperContext(); + context = new ExecMapperContext(job); if (operator != null) { operator.setExecContext(context); } @@ -353,6 +352,7 @@ public class FetchOperator implements Se } return; } else { + setFetchOperatorContext(job, work.getPartDir()); iterPath = work.getPartDir().iterator(); iterPartDesc = work.getPartDesc().iterator(); } @@ -381,6 +381,30 @@ public class FetchOperator implements Se } /** + * Set context for this fetch operator in to the jobconf. + * This helps InputFormats make decisions based on the scope of the complete + * operation. + * @param conf the configuration to modify + * @param partDirs the list of partition directories + */ + static void setFetchOperatorContext(JobConf conf, + ArrayList<Path> partDirs) { + if (partDirs != null) { + StringBuilder buff = new StringBuilder(); + boolean first = true; + for(Path p: partDirs) { + if (first) { + first = false; + } else { + buff.append('\t'); + } + buff.append(StringEscapeUtils.escapeJava(p.toString())); + } + conf.set(FETCH_OPERATOR_DIRECTORY_LIST, buff.toString()); + } + } + + /** * A cache of Object Inspector Settable Properties. */ private static Map<ObjectInspector, Boolean> oiSettableProperties = new HashMap<ObjectInspector, Boolean>(); @@ -749,7 +773,8 @@ public class FetchOperator implements Se */ private FileStatus[] listStatusUnderPath(FileSystem fs, Path p) throws IOException { boolean recursive = HiveConf.getBoolVar(job, HiveConf.ConfVars.HADOOPMAPREDINPUTDIRRECURSIVE); - if (!recursive) { + // If this is in acid format always read it recursively regardless of what the jobconf says. + if (!recursive && !AcidUtils.isAcid(p, job)) { return fs.listStatus(p); } List<FileStatus> results = new ArrayList<FileStatus>(); Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java Tue Oct 14 19:06:45 2014 @@ -29,6 +29,8 @@ import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -38,13 +40,13 @@ import org.apache.hadoop.hive.common.Sta import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.io.AcidUtils; -import org.apache.hadoop.hive.ql.io.RecordUpdater; -import org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.ql.io.HivePartitioner; import org.apache.hadoop.hive.ql.io.HivePassThroughOutputFormat; +import org.apache.hadoop.hive.ql.io.RecordUpdater; +import org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.HiveFatalException; import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; @@ -72,14 +74,16 @@ import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.ReflectionUtils; -import com.google.common.collect.Lists; - /** * File Sink operator implementation. **/ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements Serializable { + public static final Log LOG = LogFactory.getLog(FileSinkOperator.class); + private static final boolean isInfoEnabled = LOG.isInfoEnabled(); + private static final boolean isDebugEnabled = LOG.isDebugEnabled(); + protected transient HashMap<String, FSPaths> valToPaths; protected transient int numDynParts; protected transient List<String> dpColNames; @@ -101,10 +105,6 @@ public class FileSinkOperator extends Te protected transient boolean isCollectRWStats; private transient FSPaths prevFsp; private transient FSPaths fpaths; - private transient ObjectInspector keyOI; - private transient List<Object> keyWritables; - private transient List<String> keys; - private transient int numKeyColToRead; private StructField recIdField; // field to find record identifier in private StructField bucketField; // field bucket is in in record id private StructObjectInspector recIdInspector; // OI for inspecting record id @@ -131,9 +131,6 @@ public class FileSinkOperator extends Te int acidLastBucket = -1; int acidFileOffset = -1; - public FSPaths() { - } - public FSPaths(Path specPath) { tmpPath = Utilities.toTempPath(specPath); taskOutputTempPath = Utilities.toTaskTempPath(specPath); @@ -141,7 +138,9 @@ public class FileSinkOperator extends Te finalPaths = new Path[numFiles]; outWriters = new RecordWriter[numFiles]; updaters = new RecordUpdater[numFiles]; - LOG.debug("Created slots for " + numFiles); + if (isDebugEnabled) { + LOG.debug("Created slots for " + numFiles); + } stat = new Stat(); } @@ -326,7 +325,6 @@ public class FileSinkOperator extends Te parent = Utilities.toTempPath(conf.getDirName()); statsCollectRawDataSize = conf.isStatsCollectRawDataSize(); statsFromRecordWriter = new boolean[numFiles]; - serializer = (Serializer) conf.getTableInfo().getDeserializerClass().newInstance(); serializer.initialize(null, conf.getTableInfo().getProperties()); outputClass = serializer.getSerializedClass(); @@ -363,20 +361,6 @@ public class FileSinkOperator extends Te lbSetup(); } - int numPart = 0; - int numBuck = 0; - if (conf.getPartitionCols() != null && !conf.getPartitionCols().isEmpty()) { - numPart = conf.getPartitionCols().size(); - } - - // bucket number will exists only in PARTITION_BUCKET_SORTED mode - if (conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) { - numBuck = 1; - } - numKeyColToRead = numPart + numBuck; - keys = Lists.newArrayListWithCapacity(numKeyColToRead); - keyWritables = Lists.newArrayListWithCapacity(numKeyColToRead); - if (!bDynParts) { fsp = new FSPaths(specPath); @@ -423,7 +407,8 @@ public class FileSinkOperator extends Te this.dpColNames = dpCtx.getDPColNames(); this.maxPartitions = dpCtx.getMaxPartitionsPerNode(); - assert numDynParts == dpColNames.size() : "number of dynamic paritions should be the same as the size of DP mapping"; + assert numDynParts == dpColNames.size() + : "number of dynamic paritions should be the same as the size of DP mapping"; if (dpColNames != null && dpColNames.size() > 0) { this.bDynParts = true; @@ -441,6 +426,9 @@ public class FileSinkOperator extends Te newFieldsOI.add(sf.getFieldObjectInspector()); newFieldsName.add(sf.getFieldName()); this.dpStartCol++; + } else { + // once we found the start column for partition column we are done + break; } } assert newFieldsOI.size() > 0 : "new Fields ObjectInspector is empty"; @@ -457,11 +445,15 @@ public class FileSinkOperator extends Te Set<Integer> seenBuckets = new HashSet<Integer>(); for (int idx = 0; idx < totalFiles; idx++) { if (this.getExecContext() != null && this.getExecContext().getFileId() != null) { - LOG.info("replace taskId from execContext "); + if (isInfoEnabled) { + LOG.info("replace taskId from execContext "); + } taskId = Utilities.replaceTaskIdFromFilename(taskId, this.getExecContext().getFileId()); - LOG.info("new taskId: FS " + taskId); + if (isInfoEnabled) { + LOG.info("new taskId: FS " + taskId); + } assert !multiFileSpray; assert totalFiles == 1; @@ -515,9 +507,13 @@ public class FileSinkOperator extends Te try { if (isNativeTable) { fsp.finalPaths[filesIdx] = fsp.getFinalPath(taskId, fsp.tmpPath, null); - LOG.info("Final Path: FS " + fsp.finalPaths[filesIdx]); + if (isInfoEnabled) { + LOG.info("Final Path: FS " + fsp.finalPaths[filesIdx]); + } fsp.outPaths[filesIdx] = fsp.getTaskOutPath(taskId); - LOG.info("Writing to temp file: FS " + fsp.outPaths[filesIdx]); + if (isInfoEnabled) { + LOG.info("Writing to temp file: FS " + fsp.outPaths[filesIdx]); + } } else { fsp.finalPaths[filesIdx] = fsp.outPaths[filesIdx] = specPath; } @@ -532,7 +528,9 @@ public class FileSinkOperator extends Te fsp.finalPaths[filesIdx] = fsp.getFinalPath(taskId, fsp.tmpPath, extension); } - LOG.info("New Final Path: FS " + fsp.finalPaths[filesIdx]); + if (isInfoEnabled) { + LOG.info("New Final Path: FS " + fsp.finalPaths[filesIdx]); + } if (isNativeTable) { // in recent hadoop versions, use deleteOnExit to clean tmp files. @@ -604,14 +602,22 @@ public class FileSinkOperator extends Te updateProgress(); // if DP is enabled, get the final output writers and prepare the real output row - assert inputObjInspectors[0].getCategory() == ObjectInspector.Category.STRUCT : "input object inspector is not struct"; + assert inputObjInspectors[0].getCategory() == ObjectInspector.Category.STRUCT + : "input object inspector is not struct"; if (bDynParts) { + + // we need to read bucket number which is the last column in value (after partition columns) + if (conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) { + numDynParts += 1; + } + // copy the DP column values from the input row to dpVals dpVals.clear(); dpWritables.clear(); - ObjectInspectorUtils.partialCopyToStandardObject(dpWritables, row, dpStartCol, numDynParts, - (StructObjectInspector) inputObjInspectors[0], ObjectInspectorCopyOption.WRITABLE); + ObjectInspectorUtils.partialCopyToStandardObject(dpWritables, row, dpStartCol,numDynParts, + (StructObjectInspector) inputObjInspectors[0],ObjectInspectorCopyOption.WRITABLE); + // get a set of RecordWriter based on the DP column values // pass the null value along to the escaping process to determine what the dir should be for (Object o : dpWritables) { @@ -621,16 +627,11 @@ public class FileSinkOperator extends Te dpVals.add(o.toString()); } } - // use SubStructObjectInspector to serialize the non-partitioning columns in the input row - recordValue = serializer.serialize(row, subSetOI); - // when dynamic partition sorting is not used, the DPSortState will be NONE - // in which we will fall back to old method of file system path creation - // i.e, having as many record writers as distinct values in partition column - if (conf.getDpSortState().equals(DPSortState.NONE)) { - fpaths = getDynOutPaths(dpVals, lbDirName); - } + fpaths = getDynOutPaths(dpVals, lbDirName); + // use SubStructObjectInspector to serialize the non-partitioning columns in the input row + recordValue = serializer.serialize(row, subSetOI); } else { if (lbDirName != null) { fpaths = lookupListBucketingPaths(lbDirName); @@ -686,8 +687,10 @@ public class FileSinkOperator extends Te fpaths.updaters[++fpaths.acidFileOffset] = HiveFileFormatUtils.getAcidRecordUpdater( jc, conf.getTableInfo(), bucketNum, conf, fpaths.outPaths[fpaths.acidFileOffset], rowInspector, reporter, 0); - LOG.debug("Created updater for bucket number " + bucketNum + " using file " + - fpaths.outPaths[fpaths.acidFileOffset]); + if (isDebugEnabled) { + LOG.debug("Created updater for bucket number " + bucketNum + " using file " + + fpaths.outPaths[fpaths.acidFileOffset]); + } } if (conf.getWriteType() == AcidUtils.Operation.UPDATE) { @@ -834,10 +837,8 @@ public class FileSinkOperator extends Te if (dpDir != null) { dpDir = appendToSource(lbDirName, dpDir); pathKey = dpDir; - int numericBucketNum = 0; if(conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) { String buckNum = row.get(row.size() - 1); - numericBucketNum = Integer.valueOf(buckNum); taskId = Utilities.replaceTaskIdFromFilename(Utilities.getTaskId(hconf), buckNum); pathKey = appendToSource(taskId, dpDir); } @@ -918,26 +919,6 @@ public class FileSinkOperator extends Te } @Override - public void startGroup() throws HiveException { - if (!conf.getDpSortState().equals(DPSortState.NONE)) { - keyOI = getGroupKeyObjectInspector(); - keys.clear(); - keyWritables.clear(); - ObjectInspectorUtils.partialCopyToStandardObject(keyWritables, getGroupKeyObject(), 0, - numKeyColToRead, (StructObjectInspector) keyOI, ObjectInspectorCopyOption.WRITABLE); - - for (Object o : keyWritables) { - if (o == null || o.toString().length() == 0) { - keys.add(dpCtx.getDefaultPartitionName()); - } else { - keys.add(o.toString()); - } - } - fpaths = getDynOutPaths(keys, null); - } - } - - @Override public void closeOp(boolean abort) throws HiveException { if (!bDynParts && !filesCreated) { Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/FilterOperator.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/FilterOperator.java?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/FilterOperator.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/FilterOperator.java Tue Oct 14 19:06:45 2014 @@ -76,7 +76,7 @@ public class FilterOperator extends Oper statsMap.put(Counter.FILTERED, filtered_count); statsMap.put(Counter.PASSED, passed_count); conditionInspector = null; - ioContext = IOContext.get(); + ioContext = IOContext.get(hconf.get(Utilities.INPUT_NAME)); } catch (Throwable e) { throw new HiveException(e); } Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Tue Oct 14 19:06:45 2014 @@ -639,6 +639,14 @@ public final class FunctionRegistry { } } + public static String getNormalizedFunctionName(String fn) { + // Does the same thing as getFunctionInfo, except for getting the function info. + fn = fn.toLowerCase(); + return (FunctionUtils.isQualifiedFunctionName(fn) || mFunctions.get(fn) != null) ? fn + : FunctionUtils.qualifyFunctionName( + fn, SessionState.get().getCurrentDatabase().toLowerCase()); + } + private static <T extends CommonFunctionInfo> T getFunctionInfo( Map<String, T> mFunctions, String functionName) { functionName = functionName.toLowerCase(); @@ -724,6 +732,34 @@ public final class FunctionRegistry { } /** + * Returns a set of registered function names which matchs the given pattern. + * This is used for the CLI command "SHOW FUNCTIONS LIKE 'regular expression';" + * + * @param funcPatternStr + * regular expression of the interested function names + * @return set of strings contains function names + */ + public static Set<String> getFunctionNamesByLikePattern(String funcPatternStr) { + Set<String> funcNames = new TreeSet<String>(); + Set<String> allFuncs = getFunctionNames(true); + String[] subpatterns = funcPatternStr.trim().split("\\|"); + for (String subpattern : subpatterns) { + subpattern = "(?i)" + subpattern.replaceAll("\\*", ".*"); + try { + Pattern patternObj = Pattern.compile(subpattern); + for (String funcName : allFuncs) { + if (patternObj.matcher(funcName).matches()) { + funcNames.add(funcName); + } + } + } catch (PatternSyntaxException e) { + continue; + } + } + return funcNames; + } + + /** * Returns the set of synonyms of the supplied function. * * @param funcName @@ -861,15 +897,7 @@ public final class FunctionRegistry { TypeInfoUtils.getCharacterLengthForType(b)); return TypeInfoFactory.getVarcharTypeInfo(maxLength); case DECIMAL: - int prec1 = HiveDecimalUtils.getPrecisionForType(a); - int prec2 = HiveDecimalUtils.getPrecisionForType(b); - int scale1 = HiveDecimalUtils.getScaleForType(a); - int scale2 = HiveDecimalUtils.getScaleForType(b); - int intPart = Math.max(prec1 - scale1, prec2 - scale2); - int decPart = Math.max(scale1, scale2); - int prec = Math.min(intPart + decPart, HiveDecimal.MAX_PRECISION); - int scale = Math.min(decPart, HiveDecimal.MAX_PRECISION - intPart); - return TypeInfoFactory.getDecimalTypeInfo(prec, scale); + return HiveDecimalUtils.getDecimalTypeForPrimitiveCategories(a, b); default: // Type doesn't require any qualifiers. return TypeInfoFactory.getPrimitiveTypeInfo( @@ -903,15 +931,15 @@ public final class FunctionRegistry { (PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b,PrimitiveCategory.STRING); } - if (FunctionRegistry.implicitConvertable(a, b)) { + if (FunctionRegistry.implicitConvertible(a, b)) { return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, pcB); } - if (FunctionRegistry.implicitConvertable(b, a)) { + if (FunctionRegistry.implicitConvertible(b, a)) { return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, pcA); } for (PrimitiveCategory t : numericTypeList) { - if (FunctionRegistry.implicitConvertable(pcA, t) - && FunctionRegistry.implicitConvertable(pcB, t)) { + if (FunctionRegistry.implicitConvertible(pcA, t) + && FunctionRegistry.implicitConvertible(pcB, t)) { return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, t); } } @@ -955,8 +983,8 @@ public final class FunctionRegistry { } for (PrimitiveCategory t : numericTypeList) { - if (FunctionRegistry.implicitConvertable(pcA, t) - && FunctionRegistry.implicitConvertable(pcB, t)) { + if (FunctionRegistry.implicitConvertible(pcA, t) + && FunctionRegistry.implicitConvertible(pcB, t)) { return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, t); } } @@ -1007,7 +1035,7 @@ public final class FunctionRegistry { return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, commonCat); } - public static boolean implicitConvertable(PrimitiveCategory from, PrimitiveCategory to) { + public static boolean implicitConvertible(PrimitiveCategory from, PrimitiveCategory to) { if (from == to) { return true; } @@ -1058,7 +1086,7 @@ public final class FunctionRegistry { * Returns whether it is possible to implicitly convert an object of Class * from to Class to. */ - public static boolean implicitConvertable(TypeInfo from, TypeInfo to) { + public static boolean implicitConvertible(TypeInfo from, TypeInfo to) { if (from.equals(to)) { return true; } @@ -1067,9 +1095,9 @@ public final class FunctionRegistry { // 2 TypeInfos from the same qualified type (varchar, decimal) should still be // seen as equivalent. if (from.getCategory() == Category.PRIMITIVE && to.getCategory() == Category.PRIMITIVE) { - return implicitConvertable( - ((PrimitiveTypeInfo)from).getPrimitiveCategory(), - ((PrimitiveTypeInfo)to).getPrimitiveCategory()); + return implicitConvertible( + ((PrimitiveTypeInfo) from).getPrimitiveCategory(), + ((PrimitiveTypeInfo) to).getPrimitiveCategory()); } return false; } @@ -1305,7 +1333,7 @@ public final class FunctionRegistry { // but there is a conversion cost. return 1; } - if (!exact && implicitConvertable(argumentPassed, argumentAccepted)) { + if (!exact && implicitConvertible(argumentPassed, argumentAccepted)) { return 1; } Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java Tue Oct 14 19:06:45 2014 @@ -77,6 +77,7 @@ public class GroupByOperator extends Ope private static final Log LOG = LogFactory.getLog(GroupByOperator.class .getName()); + private static final boolean isTraceEnabled = LOG.isTraceEnabled(); private static final long serialVersionUID = 1L; private static final int NUMROWSESTIMATESIZE = 1000; @@ -101,6 +102,7 @@ public class GroupByOperator extends Ope transient ExprNodeEvaluator unionExprEval = null; transient GenericUDAFEvaluator[] aggregationEvaluators; + transient boolean[] estimableAggregationEvaluators; protected transient ArrayList<ObjectInspector> objectInspectors; transient ArrayList<String> fieldNames; @@ -442,10 +444,10 @@ public class GroupByOperator extends Ope estimateRowSize(); } - private static final int javaObjectOverHead = 64; - private static final int javaHashEntryOverHead = 64; - private static final int javaSizePrimitiveType = 16; - private static final int javaSizeUnknownType = 256; + public static final int javaObjectOverHead = 64; + public static final int javaHashEntryOverHead = 64; + public static final int javaSizePrimitiveType = 16; + public static final int javaSizeUnknownType = 256; /** * The size of the element at position 'pos' is returned, if possible. If the @@ -557,11 +559,13 @@ public class GroupByOperator extends Ope // Go over all the aggregation classes and and get the size of the fields of // fixed length. Keep track of the variable length // fields in these aggregation classes. + estimableAggregationEvaluators = new boolean[aggregationEvaluators.length]; for (int i = 0; i < aggregationEvaluators.length; i++) { fixedRowSize += javaObjectOverHead; AggregationBuffer agg = aggregationEvaluators[i].getNewAggregationBuffer(); if (GenericUDAFEvaluator.isEstimable(agg)) { + estimableAggregationEvaluators[i] = true; continue; } Field[] fArr = ObjectInspectorUtils.getDeclaredNonStaticFields(agg.getClass()); @@ -765,10 +769,12 @@ public class GroupByOperator extends Ope flushHashTable(true); hashAggr = false; } else { - LOG.trace("Hash Aggr Enabled: #hash table = " + numRowsHashTbl - + " #total = " + numRowsInput + " reduction = " + 1.0 - * (numRowsHashTbl / numRowsInput) + " minReduction = " - + minReductionHashAggr); + if (isTraceEnabled) { + LOG.trace("Hash Aggr Enabled: #hash table = " + numRowsHashTbl + + " #total = " + numRowsInput + " reduction = " + 1.0 + * (numRowsHashTbl / numRowsInput) + " minReduction = " + + minReductionHashAggr); + } } } } @@ -952,7 +958,7 @@ public class GroupByOperator extends Ope AggregationBuffer[] aggs = hashAggregations.get(newKeys); for (int i = 0; i < aggs.length; i++) { AggregationBuffer agg = aggs[i]; - if (GenericUDAFEvaluator.isEstimable(agg)) { + if (estimableAggregationEvaluators[i]) { totalVariableSize += ((GenericUDAFEvaluator.AbstractAggregationBuffer)agg).estimate(); continue; } @@ -966,8 +972,10 @@ public class GroupByOperator extends Ope // Update the number of entries that can fit in the hash table numEntriesHashTable = (int) (maxHashTblMemory / (fixedRowSize + (totalVariableSize / numEntriesVarSize))); - LOG.trace("Hash Aggr: #hash table = " + numEntries - + " #max in hash table = " + numEntriesHashTable); + if (isTraceEnabled) { + LOG.trace("Hash Aggr: #hash table = " + numEntries + + " #max in hash table = " + numEntriesHashTable); + } } // flush if necessary Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java Tue Oct 14 19:06:45 2014 @@ -171,8 +171,14 @@ public class MapJoinOperator extends Abs private void loadHashTable() throws HiveException { - if (this.getExecContext().getLocalWork() == null - || !this.getExecContext().getLocalWork().getInputFileChangeSensitive()) { + if ((this.getExecContext() == null) + || (this.getExecContext().getLocalWork() == null) + || (this.getExecContext().getLocalWork().getInputFileChangeSensitive() == false) + ) { + /* + * This early-exit criteria is not applicable if the local work is sensitive to input file changes. + * But the check does no apply if there is no local work, or if this is a reducer vertex (execContext is null). + */ if (hashTblInitedOnce) { return; } else { @@ -313,8 +319,8 @@ public class MapJoinOperator extends Abs tableContainer.dumpMetrics(); } } - if ((this.getExecContext().getLocalWork() != null - && this.getExecContext().getLocalWork().getInputFileChangeSensitive()) + if ((this.getExecContext() != null) && (this.getExecContext().getLocalWork() != null) + && (this.getExecContext().getLocalWork().getInputFileChangeSensitive()) && mapJoinTables != null) { for (MapJoinTableContainer tableContainer : mapJoinTables) { if (tableContainer != null) { Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java Tue Oct 14 19:06:45 2014 @@ -33,9 +33,10 @@ import org.apache.hadoop.conf.Configurat import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; -import org.apache.hadoop.hive.ql.io.IOContext; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; +import org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor; import org.apache.hadoop.hive.ql.io.RecordIdentifier; +import org.apache.hadoop.hive.ql.io.IOContext; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.plan.MapWork; @@ -181,7 +182,7 @@ public class MapOperator extends Operato PartitionDesc pd = ctx.partDesc; TableDesc td = pd.getTableDesc(); - + MapOpCtx opCtx = new MapOpCtx(); // Use table properties in case of unpartitioned tables, // and the union of table properties and partition properties, with partition @@ -205,42 +206,42 @@ public class MapOperator extends Operato opCtx.partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter( partRawRowObjectInspector, opCtx.tblRawRowObjectInspector); - + // Next check if this table has partitions and if so // get the list of partition names as well as allocate // the serdes for the partition columns String pcols = overlayedProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); - + if (pcols != null && pcols.length() > 0) { String[] partKeys = pcols.trim().split("/"); String pcolTypes = overlayedProps .getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES); String[] partKeyTypes = pcolTypes.trim().split(":"); - + if (partKeys.length > partKeyTypes.length) { throw new HiveException("Internal error : partKeys length, " +partKeys.length + " greater than partKeyTypes length, " + partKeyTypes.length); } - + List<String> partNames = new ArrayList<String>(partKeys.length); Object[] partValues = new Object[partKeys.length]; List<ObjectInspector> partObjectInspectors = new ArrayList<ObjectInspector>(partKeys.length); - + for (int i = 0; i < partKeys.length; i++) { String key = partKeys[i]; partNames.add(key); ObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector (TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i])); - + // Partitions do not exist for this table if (partSpec == null) { // for partitionless table, initialize partValue to null partValues[i] = null; } else { - partValues[i] = + partValues[i] = ObjectInspectorConverters. getConverter(PrimitiveObjectInspectorFactory. - javaStringObjectInspector, oi).convert(partSpec.get(key)); + javaStringObjectInspector, oi).convert(partSpec.get(key)); } partObjectInspectors.add(oi); } @@ -337,13 +338,8 @@ public class MapOperator extends Operato return tableDescOI; } - private boolean isPartitioned(PartitionDesc pd) { - return pd.getPartSpec() != null && !pd.getPartSpec().isEmpty(); - } - public void setChildren(Configuration hconf) throws HiveException { - - Path fpath = IOContext.get().getInputPath(); + Path fpath = IOContext.get(hconf.get(Utilities.INPUT_NAME)).getInputPath(); boolean schemeless = fpath.toUri().getScheme() == null; @@ -639,4 +635,8 @@ public class MapOperator extends Operato return null; } + @Override + public Map<Integer, DummyStoreOperator> getTagToOperatorTree() { + return MapRecordProcessor.getConnectOps(); + } } Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java Tue Oct 14 19:06:45 2014 @@ -353,6 +353,7 @@ public class MoveTask extends Task<MoveW pushFeed(FeedType.DYNAMIC_PARTITIONS, dps); } + long startTime = System.currentTimeMillis(); // load the list of DP partitions and return the list of partition specs // TODO: In a follow-up to HIVE-1361, we should refactor loadDynamicPartitions // to use Utilities.getFullDPSpecs() to get the list of full partSpecs. @@ -360,7 +361,7 @@ public class MoveTask extends Task<MoveW // iterate over it and call loadPartition() here. // The reason we don't do inside HIVE-1361 is the latter is large and we // want to isolate any potential issue it may introduce. - ArrayList<LinkedHashMap<String, String>> dp = + Map<Map<String, String>, Partition> dp = db.loadDynamicPartitions( tbd.getSourcePath(), tbd.getTable().getTableName(), @@ -370,16 +371,19 @@ public class MoveTask extends Task<MoveW tbd.getHoldDDLTime(), isSkewedStoredAsDirs(tbd), work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID); + console.printInfo("\t Time taken for load dynamic partitions : " + + (System.currentTimeMillis() - startTime)); if (dp.size() == 0 && conf.getBoolVar(HiveConf.ConfVars.HIVE_ERROR_ON_EMPTY_PARTITION)) { throw new HiveException("This query creates no partitions." + " To turn off this error, set hive.error.on.empty.partition=false."); } + startTime = System.currentTimeMillis(); // for each partition spec, get the partition // and put it to WriteEntity for post-exec hook - for (LinkedHashMap<String, String> partSpec: dp) { - Partition partn = db.getPartition(table, partSpec, false); + for(Map.Entry<Map<String, String>, Partition> entry : dp.entrySet()) { + Partition partn = entry.getValue(); if (bucketCols != null || sortCols != null) { updatePartitionBucketSortColumns(table, partn, bucketCols, numBuckets, sortCols); @@ -412,8 +416,10 @@ public class MoveTask extends Task<MoveW table.getCols()); } - console.printInfo("\tLoading partition " + partSpec); + console.printInfo("\tLoading partition " + entry.getKey()); } + console.printInfo("\t Time taken for adding to write entity : " + + (System.currentTimeMillis() - startTime)); dc = null; // reset data container to prevent it being added again. } else { // static partitions List<String> partVals = MetaStoreUtils.getPvals(table.getPartCols(),
