Modified: hive/branches/spark/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactoryForTest.java URL: http://svn.apache.org/viewvc/hive/branches/spark/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactoryForTest.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactoryForTest.java (original) +++ hive/branches/spark/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactoryForTest.java Sat Sep 20 17:34:39 2014 @@ -37,7 +37,7 @@ public class SQLStdHiveAuthorizerFactory return new HiveAuthorizerImpl( privilegeManager, new SQLStdHiveAuthorizationValidatorForTest(metastoreClientFactory, conf, authenticator, - privilegeManager) + privilegeManager, ctx) ); } }
Modified: hive/branches/spark/jdbc/pom.xml URL: http://svn.apache.org/viewvc/hive/branches/spark/jdbc/pom.xml?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/jdbc/pom.xml (original) +++ hive/branches/spark/jdbc/pom.xml Sat Sep 20 17:34:39 2014 @@ -80,6 +80,17 @@ <artifactId>libthrift</artifactId> <version>${libthrift.version}</version> </dependency> + <dependency> + <groupId>org.apache.zookeeper</groupId> + <artifactId>zookeeper</artifactId> + <version>${zookeeper.version}</version> + <exclusions> + <exclusion> + <groupId>org.jboss.netty</groupId> + <artifactId>netty</artifactId> + </exclusion> + </exclusions> + </dependency> </dependencies> <profiles> @@ -141,6 +152,16 @@ <exclude>org.apache.velocity:*</exclude> </excludes> </artifactSet> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> </configuration> </execution> </executions> Modified: hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java URL: http://svn.apache.org/viewvc/hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java (original) +++ hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java Sat Sep 20 17:34:39 2014 @@ -53,6 +53,7 @@ import javax.security.sasl.SaslException import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hive.jdbc.Utils.JdbcConnectionParams; import org.apache.hive.service.auth.HiveAuthFactory; import org.apache.hive.service.auth.KerberosSaslHelper; import org.apache.hive.service.auth.PlainSaslHelper; @@ -86,37 +87,20 @@ import org.apache.thrift.transport.TTran */ public class HiveConnection implements java.sql.Connection { public static final Log LOG = LogFactory.getLog(HiveConnection.class.getName()); - private static final String HIVE_AUTH_TYPE= "auth"; - private static final String HIVE_AUTH_QOP = "sasl.qop"; - private static final String HIVE_AUTH_SIMPLE = "noSasl"; - private static final String HIVE_AUTH_TOKEN = "delegationToken"; - private static final String HIVE_AUTH_USER = "user"; - private static final String HIVE_AUTH_PRINCIPAL = "principal"; - private static final String HIVE_AUTH_PASSWD = "password"; - private static final String HIVE_AUTH_KERBEROS_AUTH_TYPE = "kerberosAuthType"; - private static final String HIVE_AUTH_KERBEROS_AUTH_TYPE_FROM_SUBJECT = "fromSubject"; - private static final String HIVE_ANONYMOUS_USER = "anonymous"; - private static final String HIVE_ANONYMOUS_PASSWD = "anonymous"; - private static final String HIVE_USE_SSL = "ssl"; - private static final String HIVE_SSL_TRUST_STORE = "sslTrustStore"; - private static final String HIVE_SSL_TRUST_STORE_PASSWORD = "trustStorePassword"; - private static final String HIVE_SERVER2_TRANSPORT_MODE = "hive.server2.transport.mode"; - private static final String HIVE_SERVER2_THRIFT_HTTP_PATH = "hive.server2.thrift.http.path"; private static final String HIVE_VAR_PREFIX = "hivevar:"; private static final String HIVE_CONF_PREFIX = "hiveconf:"; - // Currently supports JKS keystore format - // See HIVE-6286 (Add support for PKCS12 keystore format) - private static final String HIVE_SSL_TRUST_STORE_TYPE = "JKS"; - - private final String jdbcURI; - private final String host; - private final int port; + + private String jdbcUriString; + private String host; + private int port; private final Map<String, String> sessConfMap; private final Map<String, String> hiveConfMap; private final Map<String, String> hiveVarMap; + private JdbcConnectionParams connParams; private final boolean isEmbeddedMode; private TTransport transport; - private TCLIService.Iface client; // todo should be replaced by CliServiceClient + // TODO should be replaced by CliServiceClient + private TCLIService.Iface client; private boolean isClosed = true; private SQLWarning warningChain = null; private TSessionHandle sessHandle = null; @@ -126,14 +110,12 @@ public class HiveConnection implements j public HiveConnection(String uri, Properties info) throws SQLException { setupLoginTimeout(); - jdbcURI = uri; - // parse the connection uri - Utils.JdbcConnectionParams connParams; try { connParams = Utils.parseURL(uri); - } catch (IllegalArgumentException e) { + } catch (ZooKeeperHiveClientException e) { throw new SQLException(e); } + jdbcUriString = connParams.getJdbcUriString(); // extract parsed connection parameters: // JDBC URL: jdbc:hive2://<host>:<port>/dbName;sess_var_list?hive_conf_list#hive_var_list // each list: <key1>=<val1>;<key2>=<val2> and so on @@ -164,14 +146,14 @@ public class HiveConnection implements j } else { // extract user/password from JDBC connection properties if its not supplied in the // connection URL - if (info.containsKey(HIVE_AUTH_USER)) { - sessConfMap.put(HIVE_AUTH_USER, info.getProperty(HIVE_AUTH_USER)); - if (info.containsKey(HIVE_AUTH_PASSWD)) { - sessConfMap.put(HIVE_AUTH_PASSWD, info.getProperty(HIVE_AUTH_PASSWD)); + if (info.containsKey(JdbcConnectionParams.AUTH_USER)) { + sessConfMap.put(JdbcConnectionParams.AUTH_USER, info.getProperty(JdbcConnectionParams.AUTH_USER)); + if (info.containsKey(JdbcConnectionParams.AUTH_PASSWD)) { + sessConfMap.put(JdbcConnectionParams.AUTH_PASSWD, info.getProperty(JdbcConnectionParams.AUTH_PASSWD)); } } - if (info.containsKey(HIVE_AUTH_TYPE)) { - sessConfMap.put(HIVE_AUTH_TYPE, info.getProperty(HIVE_AUTH_TYPE)); + if (info.containsKey(JdbcConnectionParams.AUTH_TYPE)) { + sessConfMap.put(JdbcConnectionParams.AUTH_TYPE, info.getProperty(JdbcConnectionParams.AUTH_TYPE)); } // open the client transport openTransport(); @@ -189,19 +171,44 @@ public class HiveConnection implements j supportedProtocols.add(TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V7); // open client session - openSession(connParams); + openSession(); } private void openTransport() throws SQLException { - // TODO: Refactor transport creation to a factory, it's getting uber messy here - transport = isHttpTransportMode() ? createHttpTransport() : createBinaryTransport(); - try { - if (!transport.isOpen()) { - transport.open(); + while (true) { + try { + transport = isHttpTransportMode() ? createHttpTransport() : createBinaryTransport(); + if (!transport.isOpen()) { + LOG.info("Will try to open client transport with JDBC Uri: " + jdbcUriString); + transport.open(); + } + break; + } catch (TTransportException e) { + LOG.info("Could not open client transport with JDBC Uri: " + jdbcUriString); + // We'll retry till we exhaust all HiveServer2 uris from ZooKeeper + if ((sessConfMap.get(JdbcConnectionParams.SERVICE_DISCOVERY_MODE) != null) + && (JdbcConnectionParams.SERVICE_DISCOVERY_MODE_ZOOKEEPER.equalsIgnoreCase(sessConfMap + .get(JdbcConnectionParams.SERVICE_DISCOVERY_MODE)))) { + try { + // Update jdbcUriString, host & port variables in connParams + // Throw an exception if all HiveServer2 uris have been exhausted, + // or if we're unable to connect to ZooKeeper. + Utils.updateConnParamsFromZooKeeper(connParams); + } catch (ZooKeeperHiveClientException ze) { + throw new SQLException( + "Could not open client transport for any of the Server URI's in ZooKeeper: " + + ze.getMessage(), " 08S01", ze); + } + // Update with new values + jdbcUriString = connParams.getJdbcUriString(); + host = connParams.getHost(); + port = connParams.getPort(); + LOG.info("Will retry opening client transport"); + } else { + throw new SQLException("Could not open client transport with JDBC Uri: " + jdbcUriString + + ": " + e.getMessage(), " 08S01", e); + } } - } catch (TTransportException e) { - throw new SQLException("Could not open connection to " - + jdbcURI + ": " + e.getMessage(), " 08S01", e); } } @@ -211,37 +218,36 @@ public class HiveConnection implements j String schemeName = useSsl ? "https" : "http"; // http path should begin with "/" String httpPath; - httpPath = hiveConfMap.get(HIVE_SERVER2_THRIFT_HTTP_PATH); - if(httpPath == null) { + httpPath = hiveConfMap.get(JdbcConnectionParams.HTTP_PATH); + if (httpPath == null) { httpPath = "/"; - } - else if(!httpPath.startsWith("/")) { + } else if (!httpPath.startsWith("/")) { httpPath = "/" + httpPath; } - return schemeName + "://" + host + ":" + port + httpPath; + return schemeName + "://" + host + ":" + port + httpPath; } - private TTransport createHttpTransport() throws SQLException { + private TTransport createHttpTransport() throws SQLException, TTransportException { DefaultHttpClient httpClient; - boolean useSsl = isSslConnection(); - // Create an http client from the configs - try { - httpClient = getHttpClient(useSsl); - } catch (Exception e) { - String msg = "Could not create http connection to " + - jdbcURI + ". " + e.getMessage(); - throw new SQLException(msg, " 08S01", e); - } - + httpClient = getHttpClient(useSsl); try { transport = new THttpClient(getServerHttpUrl(useSsl), httpClient); + // We'll call an open/close here to send a test HTTP message to the server. Any + // TTransportException caused by trying to connect to a non-available peer are thrown here. + // Bubbling them up the call hierarchy so that a retry can happen in openTransport, + // if dynamic service discovery is configured. + TCLIService.Iface client = new TCLIService.Client(new TBinaryProtocol(transport)); + TOpenSessionResp openResp = client.OpenSession(new TOpenSessionReq()); + if (openResp != null) { + client.CloseSession(new TCloseSessionReq(openResp.getSessionHandle())); + } } - catch (TTransportException e) { + catch (TException e) { String msg = "Could not create http connection to " + - jdbcURI + ". " + e.getMessage(); - throw new SQLException(msg, " 08S01", e); + jdbcUriString + ". " + e.getMessage(); + throw new TTransportException(msg, e); } return transport; } @@ -263,7 +269,7 @@ public class HiveConnection implements j * for sending to the server before every request. */ requestInterceptor = new HttpKerberosRequestInterceptor( - sessConfMap.get(HIVE_AUTH_PRINCIPAL), host, getServerHttpUrl(false)); + sessConfMap.get(JdbcConnectionParams.AUTH_PRINCIPAL), host, getServerHttpUrl(false)); } else { /** @@ -273,11 +279,23 @@ public class HiveConnection implements j requestInterceptor = new HttpBasicAuthInterceptor(getUserName(), getPassword()); // Configure httpClient for SSL if (useSsl) { - String sslTrustStorePath = sessConfMap.get(HIVE_SSL_TRUST_STORE); + String sslTrustStorePath = sessConfMap.get(JdbcConnectionParams.SSL_TRUST_STORE); String sslTrustStorePassword = sessConfMap.get( - HIVE_SSL_TRUST_STORE_PASSWORD); + JdbcConnectionParams.SSL_TRUST_STORE_PASSWORD); KeyStore sslTrustStore; SSLSocketFactory socketFactory; + /** + * The code within the try block throws: + * 1. SSLInitializationException + * 2. KeyStoreException + * 3. IOException + * 4. NoSuchAlgorithmException + * 5. CertificateException + * 6. KeyManagementException + * 7. UnrecoverableKeyException + * We don't want the client to retry on any of these, hence we catch all + * and throw a SQLException. + */ try { if (sslTrustStorePath == null || sslTrustStorePath.isEmpty()) { // Create a default socket factory based on standard JSSE trust material @@ -285,7 +303,7 @@ public class HiveConnection implements j } else { // Pick trust store config from the given path - sslTrustStore = KeyStore.getInstance(HIVE_SSL_TRUST_STORE_TYPE); + sslTrustStore = KeyStore.getInstance(JdbcConnectionParams.SSL_TRUST_STORE_TYPE); sslTrustStore.load(new FileInputStream(sslTrustStorePath), sslTrustStorePassword.toCharArray()); socketFactory = new SSLSocketFactory(sslTrustStore); @@ -296,7 +314,7 @@ public class HiveConnection implements j } catch (Exception e) { String msg = "Could not create an https connection to " + - jdbcURI + ". " + e.getMessage(); + jdbcUriString + ". " + e.getMessage(); throw new SQLException(msg, " 08S01", e); } } @@ -316,29 +334,32 @@ public class HiveConnection implements j * - Raw (non-SASL) socket * * Kerberos and Delegation token supports SASL QOP configurations + * @throws SQLException, TTransportException */ - private TTransport createBinaryTransport() throws SQLException { + private TTransport createBinaryTransport() throws SQLException, TTransportException { try { // handle secure connection if specified - if (!HIVE_AUTH_SIMPLE.equals(sessConfMap.get(HIVE_AUTH_TYPE))) { + if (!JdbcConnectionParams.AUTH_SIMPLE.equals(sessConfMap.get(JdbcConnectionParams.AUTH_TYPE))) { // If Kerberos Map<String, String> saslProps = new HashMap<String, String>(); SaslQOP saslQOP = SaslQOP.AUTH; - if (sessConfMap.containsKey(HIVE_AUTH_PRINCIPAL)) { - if (sessConfMap.containsKey(HIVE_AUTH_QOP)) { + if (sessConfMap.containsKey(JdbcConnectionParams.AUTH_PRINCIPAL)) { + if (sessConfMap.containsKey(JdbcConnectionParams.AUTH_QOP)) { try { - saslQOP = SaslQOP.fromString(sessConfMap.get(HIVE_AUTH_QOP)); + saslQOP = SaslQOP.fromString(sessConfMap.get(JdbcConnectionParams.AUTH_QOP)); } catch (IllegalArgumentException e) { - throw new SQLException("Invalid " + HIVE_AUTH_QOP + + throw new SQLException("Invalid " + JdbcConnectionParams.AUTH_QOP + " parameter. " + e.getMessage(), "42000", e); } } saslProps.put(Sasl.QOP, saslQOP.toString()); saslProps.put(Sasl.SERVER_AUTH, "true"); - boolean assumeSubject = HIVE_AUTH_KERBEROS_AUTH_TYPE_FROM_SUBJECT.equals(sessConfMap.get(HIVE_AUTH_KERBEROS_AUTH_TYPE)); + boolean assumeSubject = JdbcConnectionParams.AUTH_KERBEROS_AUTH_TYPE_FROM_SUBJECT.equals(sessConfMap + .get(JdbcConnectionParams.AUTH_KERBEROS_AUTH_TYPE)); transport = KerberosSaslHelper.getKerberosTransport( - sessConfMap.get(HIVE_AUTH_PRINCIPAL), host, - HiveAuthFactory.getSocketTransport(host, port, loginTimeout), saslProps, assumeSubject); + sessConfMap.get(JdbcConnectionParams.AUTH_PRINCIPAL), host, + HiveAuthFactory.getSocketTransport(host, port, loginTimeout), saslProps, + assumeSubject); } else { // If there's a delegation token available then use token based connection String tokenStr = getClientDelegationToken(sessConfMap); @@ -349,10 +370,15 @@ public class HiveConnection implements j // we are using PLAIN Sasl connection with user/password String userName = getUserName(); String passwd = getPassword(); + // Note: Thrift returns an SSL socket that is already bound to the specified host:port + // Therefore an open called on this would be a no-op later + // Hence, any TTransportException related to connecting with the peer are thrown here. + // Bubbling them up the call hierarchy so that a retry can happen in openTransport, + // if dynamic service discovery is configured. if (isSslConnection()) { // get SSL socket - String sslTrustStore = sessConfMap.get(HIVE_SSL_TRUST_STORE); - String sslTrustStorePassword = sessConfMap.get(HIVE_SSL_TRUST_STORE_PASSWORD); + String sslTrustStore = sessConfMap.get(JdbcConnectionParams.SSL_TRUST_STORE); + String sslTrustStorePassword = sessConfMap.get(JdbcConnectionParams.SSL_TRUST_STORE_PASSWORD); if (sslTrustStore == null || sslTrustStore.isEmpty()) { transport = HiveAuthFactory.getSSLSocket(host, port, loginTimeout); } else { @@ -373,10 +399,7 @@ public class HiveConnection implements j } } catch (SaslException e) { throw new SQLException("Could not create secure connection to " - + jdbcURI + ": " + e.getMessage(), " 08S01", e); - } catch (TTransportException e) { - throw new SQLException("Could not create connection to " - + jdbcURI + ": " + e.getMessage(), " 08S01", e); + + jdbcUriString + ": " + e.getMessage(), " 08S01", e); } return transport; } @@ -385,7 +408,7 @@ public class HiveConnection implements j private String getClientDelegationToken(Map<String, String> jdbcConnConf) throws SQLException { String tokenStr = null; - if (HIVE_AUTH_TOKEN.equalsIgnoreCase(jdbcConnConf.get(HIVE_AUTH_TYPE))) { + if (JdbcConnectionParams.AUTH_TOKEN.equalsIgnoreCase(jdbcConnConf.get(JdbcConnectionParams.AUTH_TYPE))) { // check delegation token in job conf if any try { tokenStr = ShimLoader.getHadoopShims(). @@ -397,7 +420,7 @@ public class HiveConnection implements j return tokenStr; } - private void openSession(Utils.JdbcConnectionParams connParams) throws SQLException { + private void openSession() throws SQLException { TOpenSessionReq openReq = new TOpenSessionReq(); Map<String, String> openConf = new HashMap<String, String>(); @@ -433,7 +456,7 @@ public class HiveConnection implements j } catch (TException e) { LOG.error("Error opening session", e); throw new SQLException("Could not establish connection to " - + jdbcURI + ": " + e.getMessage(), " 08S01", e); + + jdbcUriString + ": " + e.getMessage(), " 08S01", e); } isClosed = false; } @@ -442,27 +465,27 @@ public class HiveConnection implements j * @return username from sessConfMap */ private String getUserName() { - return getSessionValue(HIVE_AUTH_USER, HIVE_ANONYMOUS_USER); + return getSessionValue(JdbcConnectionParams.AUTH_USER, JdbcConnectionParams.ANONYMOUS_USER); } /** * @return password from sessConfMap */ private String getPassword() { - return getSessionValue(HIVE_AUTH_PASSWD, HIVE_ANONYMOUS_PASSWD); + return getSessionValue(JdbcConnectionParams.AUTH_PASSWD, JdbcConnectionParams.ANONYMOUS_PASSWD); } private boolean isSslConnection() { - return "true".equalsIgnoreCase(sessConfMap.get(HIVE_USE_SSL)); + return "true".equalsIgnoreCase(sessConfMap.get(JdbcConnectionParams.USE_SSL)); } private boolean isKerberosAuthMode() { - return !HIVE_AUTH_SIMPLE.equals(sessConfMap.get(HIVE_AUTH_TYPE)) - && sessConfMap.containsKey(HIVE_AUTH_PRINCIPAL); + return !JdbcConnectionParams.AUTH_SIMPLE.equals(sessConfMap.get(JdbcConnectionParams.AUTH_TYPE)) + && sessConfMap.containsKey(JdbcConnectionParams.AUTH_PRINCIPAL); } private boolean isHttpTransportMode() { - String transportMode = hiveConfMap.get(HIVE_SERVER2_TRANSPORT_MODE); + String transportMode = hiveConfMap.get(JdbcConnectionParams.TRANSPORT_MODE); if(transportMode != null && (transportMode.equalsIgnoreCase("http"))) { return true; } Modified: hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HiveDriver.java URL: http://svn.apache.org/viewvc/hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HiveDriver.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HiveDriver.java (original) +++ hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HiveDriver.java Sat Sep 20 17:34:39 2014 @@ -230,7 +230,12 @@ public class HiveDriver implements Drive throw new SQLException("Invalid connection url: " + url); } - JdbcConnectionParams params = Utils.parseURL(url); + JdbcConnectionParams params = null; + try { + params = Utils.parseURL(url); + } catch (ZooKeeperHiveClientException e) { + throw new SQLException(e); + } String host = params.getHost(); if (host == null){ host = ""; @@ -239,7 +244,7 @@ public class HiveDriver implements Drive if(host.equals("")){ port = ""; } - else if(port.equals("0")){ + else if(port.equals("0") || port.equals("-1")){ port = Utils.DEFAULT_PORT; } String db = params.getDbName(); Modified: hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/Utils.java URL: http://svn.apache.org/viewvc/hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/Utils.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/Utils.java (original) +++ hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/Utils.java Sat Sep 20 17:34:39 2014 @@ -19,17 +19,23 @@ package org.apache.hive.jdbc; import java.net.URI; +import java.net.URISyntaxException; import java.sql.SQLException; +import java.util.ArrayList; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hive.service.cli.HiveSQLException; import org.apache.hive.service.cli.thrift.TStatus; import org.apache.hive.service.cli.thrift.TStatusCode; public class Utils { + public static final Log LOG = LogFactory.getLog(Utils.class.getName()); /** * The required prefix for the connection URL. */ @@ -47,14 +53,58 @@ public class Utils { private static final String URI_JDBC_PREFIX = "jdbc:"; + private static final String URI_HIVE_PREFIX = "hive2:"; + public static class JdbcConnectionParams { + // Note on client side parameter naming convention: + // Prefer using a shorter camelCase param name instead of using the same name as the + // corresponding + // HiveServer2 config. + // For a jdbc url: jdbc:hive2://<host>:<port>/dbName;sess_var_list?hive_conf_list#hive_var_list, + // client side params are specified in sess_var_list + + // Client param names: + static final String AUTH_TYPE = "auth"; + static final String AUTH_QOP = "sasl.qop"; + static final String AUTH_SIMPLE = "noSasl"; + static final String AUTH_TOKEN = "delegationToken"; + static final String AUTH_USER = "user"; + static final String AUTH_PRINCIPAL = "principal"; + static final String AUTH_PASSWD = "password"; + static final String AUTH_KERBEROS_AUTH_TYPE = "kerberosAuthType"; + static final String AUTH_KERBEROS_AUTH_TYPE_FROM_SUBJECT = "fromSubject"; + static final String ANONYMOUS_USER = "anonymous"; + static final String ANONYMOUS_PASSWD = "anonymous"; + static final String USE_SSL = "ssl"; + static final String SSL_TRUST_STORE = "sslTrustStore"; + static final String SSL_TRUST_STORE_PASSWORD = "trustStorePassword"; + static final String TRANSPORT_MODE = "hive.server2.transport.mode"; + static final String HTTP_PATH = "hive.server2.thrift.http.path"; + static final String SERVICE_DISCOVERY_MODE = "serviceDiscoveryMode"; + // Don't use dynamic serice discovery + static final String SERVICE_DISCOVERY_MODE_NONE = "none"; + // Use ZooKeeper for indirection while using dynamic service discovery + static final String SERVICE_DISCOVERY_MODE_ZOOKEEPER = "zooKeeper"; + static final String ZOOKEEPER_NAMESPACE = "zooKeeperNamespace"; + + // Non-configurable params: + // ZOOKEEPER_SESSION_TIMEOUT is not exposed as client configurable + static final int ZOOKEEPER_SESSION_TIMEOUT = 600 * 1000; + // Currently supports JKS keystore format + static final String SSL_TRUST_STORE_TYPE = "JKS"; + private String host = null; private int port; + private String jdbcUriString; private String dbName = DEFAULT_DATABASE; private Map<String,String> hiveConfs = new LinkedHashMap<String,String>(); private Map<String,String> hiveVars = new LinkedHashMap<String,String>(); private Map<String,String> sessionVars = new LinkedHashMap<String,String>(); private boolean isEmbeddedMode = false; + private String[] authorityList; + private String zooKeeperEnsemble = null; + private String currentHostZnodePath; + private List<String> rejectedHostZnodePaths = new ArrayList<String>(); public JdbcConnectionParams() { } @@ -62,46 +112,94 @@ public class Utils { public String getHost() { return host; } + public int getPort() { return port; } + + public String getJdbcUriString() { + return jdbcUriString; + } + public String getDbName() { return dbName; } + public Map<String, String> getHiveConfs() { return hiveConfs; } - public Map<String,String> getHiveVars() { + + public Map<String, String> getHiveVars() { return hiveVars; } + public boolean isEmbeddedMode() { return isEmbeddedMode; } + public Map<String, String> getSessionVars() { return sessionVars; } + public String[] getAuthorityList() { + return authorityList; + } + + public String getZooKeeperEnsemble() { + return zooKeeperEnsemble; + } + + public List<String> getRejectedHostZnodePaths() { + return rejectedHostZnodePaths; + } + + public String getCurrentHostZnodePath() { + return currentHostZnodePath; + } + public void setHost(String host) { this.host = host; } + public void setPort(int port) { this.port = port; } + + public void setJdbcUriString(String jdbcUriString) { + this.jdbcUriString = jdbcUriString; + } + public void setDbName(String dbName) { this.dbName = dbName; } + public void setHiveConfs(Map<String, String> hiveConfs) { this.hiveConfs = hiveConfs; } - public void setHiveVars(Map<String,String> hiveVars) { + + public void setHiveVars(Map<String, String> hiveVars) { this.hiveVars = hiveVars; } + public void setEmbeddedMode(boolean embeddedMode) { this.isEmbeddedMode = embeddedMode; } + public void setSessionVars(Map<String, String> sessionVars) { this.sessionVars = sessionVars; } + + public void setSuppliedAuthorityList(String[] authorityList) { + this.authorityList = authorityList; + } + + public void setZooKeeperEnsemble(String zooKeeperEnsemble) { + this.zooKeeperEnsemble = zooKeeperEnsemble; + } + + public void setCurrentHostZnodePath(String currentHostZnodePath) { + this.currentHostZnodePath = currentHostZnodePath; + } } // Verify success or success_with_info status, else throw SQLException @@ -124,27 +222,33 @@ public class Utils { /** * Parse JDBC connection URL - * The new format of the URL is jdbc:hive2://<host>:<port>/dbName;sess_var_list?hive_conf_list#hive_var_list - * where the optional sess, conf and var lists are semicolon separated <key>=<val> pairs. As before, if the - * host/port is not specified, it the driver runs an embedded hive. + * The new format of the URL is: + * jdbc:hive2://<host1>:<port1>,<host2>:<port2>/dbName;sess_var_list?hive_conf_list#hive_var_list + * where the optional sess, conf and var lists are semicolon separated <key>=<val> pairs. + * For utilizing dynamic service discovery with HiveServer2 multiple comma separated host:port pairs can + * be specified as shown above. + * The JDBC driver resolves the list of uris and picks a specific server instance to connect to. + * Currently, dynamic service discovery using ZooKeeper is supported, in which case the host:port pairs represent a ZooKeeper ensemble. + * + * As before, if the host/port is not specified, it the driver runs an embedded hive. * examples - * jdbc:hive2://ubuntu:11000/db2?hive.cli.conf.printheader=true;hive.exec.mode.local.auto.inputbytes.max=9999#stab=salesTable;icol=customerID * jdbc:hive2://?hive.cli.conf.printheader=true;hive.exec.mode.local.auto.inputbytes.max=9999#stab=salesTable;icol=customerID * jdbc:hive2://ubuntu:11000/db2;user=foo;password=bar * * Connect to http://server:10001/hs2, with specified basicAuth credentials and initial database: - * jdbc:hive2://server:10001/db;user=foo;password=bar?hive.server2.transport.mode=http;hive.server2.thrift.http.path=hs2 - * - * Note that currently the session properties are not used. + * jdbc:hive2://server:10001/db;user=foo;password=bar?hive.server2.transport.mode=http;hive.server2.thrift.http.path=hs2 * * @param uri * @return + * @throws SQLException */ - public static JdbcConnectionParams parseURL(String uri) throws IllegalArgumentException { + public static JdbcConnectionParams parseURL(String uri) throws JdbcUriParseException, + SQLException, ZooKeeperHiveClientException { JdbcConnectionParams connParams = new JdbcConnectionParams(); if (!uri.startsWith(URL_PREFIX)) { - throw new IllegalArgumentException("Bad URL format: Missing prefix " + URL_PREFIX); + throw new JdbcUriParseException("Bad URL format: Missing prefix " + URL_PREFIX); } // For URLs with no other configuration @@ -154,29 +258,28 @@ public class Utils { return connParams; } - URI jdbcURI = URI.create(uri.substring(URI_JDBC_PREFIX.length())); - - // Check to prevent unintentional use of embedded mode. A missing "/" - // to separate the 'path' portion of URI can result in this. - // The missing "/" common typo while using secure mode, eg of such url - - // jdbc:hive2://localhost:10000;principal=hive/[email protected] - if((jdbcURI.getAuthority() != null) && (jdbcURI.getHost()==null)) { - throw new IllegalArgumentException("Bad URL format. Hostname not found " - + " in authority part of the url: " + jdbcURI.getAuthority() - + ". Are you missing a '/' after the hostname ?"); - } - - connParams.setHost(jdbcURI.getHost()); - if (connParams.getHost() == null) { + // The JDBC URI now supports specifying multiple host:port if dynamic service discovery is + // configured on HiveServer2 (like: host1:port1,host2:port2,host3:port3) + // We'll extract the authorities (host:port combo) from the URI, extract session vars, hive + // confs & hive vars by parsing it as a Java URI. + // To parse the intermediate URI as a Java URI, we'll give a dummy authority(dummy:00000). + // Later, we'll substitute the dummy authority for a resolved authority. + String dummyAuthorityString = "dummyhost:00000"; + String suppliedAuthorities = getAuthorities(uri, connParams); + if ((suppliedAuthorities == null) || (suppliedAuthorities.isEmpty())) { + // Given uri of the form: + // jdbc:hive2:///dbName;sess_var_list?hive_conf_list#hive_var_list connParams.setEmbeddedMode(true); } else { - int port = jdbcURI.getPort(); - if (port == -1) { - port = Integer.valueOf(DEFAULT_PORT); - } - connParams.setPort(port); + LOG.info("Supplied authorities: " + suppliedAuthorities); + String[] authorityList = suppliedAuthorities.split(","); + connParams.setSuppliedAuthorityList(authorityList); + uri = uri.replace(suppliedAuthorities, dummyAuthorityString); } + // Now parse the connection uri with dummy authority + URI jdbcURI = URI.create(uri.substring(URI_JDBC_PREFIX.length())); + // key=value pattern Pattern pattern = Pattern.compile("([^;]*)=([^;]*)[;]?"); @@ -192,12 +295,13 @@ public class Utils { } else { // we have dbname followed by session parameters dbName = sessVars.substring(0, sessVars.indexOf(';')); - sessVars = sessVars.substring(sessVars.indexOf(';')+1); + sessVars = sessVars.substring(sessVars.indexOf(';') + 1); if (sessVars != null) { Matcher sessMatcher = pattern.matcher(sessVars); while (sessMatcher.find()) { if (connParams.getSessionVars().put(sessMatcher.group(1), sessMatcher.group(2)) != null) { - throw new IllegalArgumentException("Bad URL format: Multiple values for property " + sessMatcher.group(1)); + throw new JdbcUriParseException("Bad URL format: Multiple values for property " + + sessMatcher.group(1)); } } } @@ -225,10 +329,146 @@ public class Utils { } } + // Extract host, port + if (connParams.isEmbeddedMode()) { + // In case of embedded mode we were supplied with an empty authority. + // So we never substituted the authority with a dummy one. + connParams.setHost(jdbcURI.getHost()); + connParams.setPort(jdbcURI.getPort()); + } else { + // Else substitute the dummy authority with a resolved one. + // In case of dynamic service discovery using ZooKeeper, it picks a server uri from ZooKeeper + String resolvedAuthorityString = resolveAuthority(connParams); + uri = uri.replace(dummyAuthorityString, resolvedAuthorityString); + connParams.setJdbcUriString(uri); + // Create a Java URI from the resolved URI for extracting the host/port + URI resolvedAuthorityURI = null; + try { + resolvedAuthorityURI = new URI(null, resolvedAuthorityString, null, null, null); + } catch (URISyntaxException e) { + throw new JdbcUriParseException("Bad URL format: ", e); + } + connParams.setHost(resolvedAuthorityURI.getHost()); + connParams.setPort(resolvedAuthorityURI.getPort()); + } + return connParams; } /** + * Get the authority string from the supplied uri, which could potentially contain multiple + * host:port pairs. + * + * @param uri + * @param connParams + * @return + * @throws JdbcUriParseException + */ + private static String getAuthorities(String uri, JdbcConnectionParams connParams) + throws JdbcUriParseException { + String authorities; + // For a jdbc uri like: jdbc:hive2://host1:port1,host2:port2,host3:port3/ + // Extract the uri host:port list starting after "jdbc:hive2://", till the 1st "/" or EOL + int fromIndex = Utils.URL_PREFIX.length(); + int toIndex = uri.indexOf("/", fromIndex); + if (toIndex < 0) { + authorities = uri.substring(fromIndex); + } else { + authorities = uri.substring(fromIndex, uri.indexOf("/", fromIndex)); + } + return authorities; + } + + /** + * Get a string representing a specific host:port + * @param connParams + * @return + * @throws JdbcUriParseException + * @throws ZooKeeperHiveClientException + */ + private static String resolveAuthority(JdbcConnectionParams connParams) + throws JdbcUriParseException, ZooKeeperHiveClientException { + String serviceDiscoveryMode = + connParams.getSessionVars().get(JdbcConnectionParams.SERVICE_DISCOVERY_MODE); + if ((serviceDiscoveryMode != null) + && (JdbcConnectionParams.SERVICE_DISCOVERY_MODE_ZOOKEEPER + .equalsIgnoreCase(serviceDiscoveryMode))) { + // Resolve using ZooKeeper + return resolveAuthorityUsingZooKeeper(connParams); + } else { + String authority = connParams.getAuthorityList()[0]; + URI jdbcURI = URI.create(URI_HIVE_PREFIX + "//" + authority); + // Check to prevent unintentional use of embedded mode. A missing "/" + // to separate the 'path' portion of URI can result in this. + // The missing "/" common typo while using secure mode, eg of such url - + // jdbc:hive2://localhost:10000;principal=hive/[email protected] + if ((jdbcURI.getAuthority() != null) && (jdbcURI.getHost() == null)) { + throw new JdbcUriParseException("Bad URL format. Hostname not found " + + " in authority part of the url: " + jdbcURI.getAuthority() + + ". Are you missing a '/' after the hostname ?"); + } + // Return the 1st element of the array + return jdbcURI.getAuthority(); + } + } + + /** + * Read a specific host:port from ZooKeeper + * @param connParams + * @return + * @throws ZooKeeperHiveClientException + */ + private static String resolveAuthorityUsingZooKeeper(JdbcConnectionParams connParams) + throws ZooKeeperHiveClientException { + // Set ZooKeeper ensemble in connParams for later use + connParams.setZooKeeperEnsemble(joinStringArray(connParams.getAuthorityList(), ",")); + return ZooKeeperHiveClientHelper.getNextServerUriFromZooKeeper(connParams); + } + + /** + * Read the next server coordinates (host:port combo) from ZooKeeper. Ignore the znodes already + * explored. Also update the host, port, jdbcUriString fields of connParams. + * + * @param connParams + * @throws ZooKeeperHiveClientException + */ + static void updateConnParamsFromZooKeeper(JdbcConnectionParams connParams) + throws ZooKeeperHiveClientException { + // Add current host to the rejected list + connParams.getRejectedHostZnodePaths().add(connParams.getCurrentHostZnodePath()); + // Get another HiveServer2 uri from ZooKeeper + String serverUriString = ZooKeeperHiveClientHelper.getNextServerUriFromZooKeeper(connParams); + // Parse serverUri to a java URI and extract host, port + URI serverUri = null; + try { + // Note URL_PREFIX is not a valid scheme format, therefore leaving it null in the constructor + // to construct a valid URI + serverUri = new URI(null, serverUriString, null, null, null); + } catch (URISyntaxException e) { + throw new ZooKeeperHiveClientException(e); + } + String oldServerHost = connParams.getHost(); + int oldServerPort = connParams.getPort(); + String newServerHost = serverUri.getHost(); + int newServerPort = serverUri.getPort(); + connParams.setHost(newServerHost); + connParams.setPort(newServerPort); + connParams.setJdbcUriString(connParams.getJdbcUriString().replace( + oldServerHost + ":" + oldServerPort, newServerHost + ":" + newServerPort)); + } + + private static String joinStringArray(String[] stringArray, String seperator) { + StringBuilder stringBuilder = new StringBuilder(); + for (int cur = 0, end = stringArray.length; cur < end; cur++) { + if (cur > 0) { + stringBuilder.append(seperator); + } + stringBuilder.append(stringArray[cur]); + } + return stringBuilder.toString(); + } + + /** * Takes a version string delimited by '.' and '-' characters * and returns a partial version. * Modified: hive/branches/spark/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/FieldSchema.java URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/FieldSchema.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/FieldSchema.java (original) +++ hive/branches/spark/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/FieldSchema.java Sat Sep 20 17:34:39 2014 @@ -135,9 +135,9 @@ public class FieldSchema implements org. String comment) { this(); - this.name = name; - this.type = type; - this.comment = comment; + this.name = org.apache.hive.common.util.HiveStringUtils.intern(name); + this.type = org.apache.hive.common.util.HiveStringUtils.intern(type); + this.comment = org.apache.hive.common.util.HiveStringUtils.intern(comment); } /** @@ -145,13 +145,13 @@ public class FieldSchema implements org. */ public FieldSchema(FieldSchema other) { if (other.isSetName()) { - this.name = other.name; + this.name = org.apache.hive.common.util.HiveStringUtils.intern(other.name); } if (other.isSetType()) { - this.type = other.type; + this.type = org.apache.hive.common.util.HiveStringUtils.intern(other.type); } if (other.isSetComment()) { - this.comment = other.comment; + this.comment = org.apache.hive.common.util.HiveStringUtils.intern(other.comment); } } @@ -171,7 +171,7 @@ public class FieldSchema implements org. } public void setName(String name) { - this.name = name; + this.name = org.apache.hive.common.util.HiveStringUtils.intern(name); } public void unsetName() { @@ -194,7 +194,7 @@ public class FieldSchema implements org. } public void setType(String type) { - this.type = type; + this.type = org.apache.hive.common.util.HiveStringUtils.intern(type); } public void unsetType() { @@ -217,7 +217,7 @@ public class FieldSchema implements org. } public void setComment(String comment) { - this.comment = comment; + this.comment = org.apache.hive.common.util.HiveStringUtils.intern(comment); } public void unsetComment() { Modified: hive/branches/spark/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Partition.java URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Partition.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Partition.java (original) +++ hive/branches/spark/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Partition.java Sat Sep 20 17:34:39 2014 @@ -182,14 +182,14 @@ public class Partition implements org.ap { this(); this.values = values; - this.dbName = dbName; - this.tableName = tableName; + this.dbName = org.apache.hive.common.util.HiveStringUtils.intern(dbName); + this.tableName = org.apache.hive.common.util.HiveStringUtils.intern(tableName); this.createTime = createTime; setCreateTimeIsSet(true); this.lastAccessTime = lastAccessTime; setLastAccessTimeIsSet(true); this.sd = sd; - this.parameters = parameters; + this.parameters = org.apache.hive.common.util.HiveStringUtils.intern(parameters); } /** @@ -205,10 +205,10 @@ public class Partition implements org.ap this.values = __this__values; } if (other.isSetDbName()) { - this.dbName = other.dbName; + this.dbName = org.apache.hive.common.util.HiveStringUtils.intern(other.dbName); } if (other.isSetTableName()) { - this.tableName = other.tableName; + this.tableName = org.apache.hive.common.util.HiveStringUtils.intern(other.tableName); } this.createTime = other.createTime; this.lastAccessTime = other.lastAccessTime; @@ -222,9 +222,9 @@ public class Partition implements org.ap String other_element_key = other_element.getKey(); String other_element_value = other_element.getValue(); - String __this__parameters_copy_key = other_element_key; + String __this__parameters_copy_key = org.apache.hive.common.util.HiveStringUtils.intern(other_element_key); - String __this__parameters_copy_value = other_element_value; + String __this__parameters_copy_value = org.apache.hive.common.util.HiveStringUtils.intern(other_element_value); __this__parameters.put(__this__parameters_copy_key, __this__parameters_copy_value); } @@ -296,7 +296,7 @@ public class Partition implements org.ap } public void setDbName(String dbName) { - this.dbName = dbName; + this.dbName = org.apache.hive.common.util.HiveStringUtils.intern(dbName); } public void unsetDbName() { @@ -319,7 +319,7 @@ public class Partition implements org.ap } public void setTableName(String tableName) { - this.tableName = tableName; + this.tableName = org.apache.hive.common.util.HiveStringUtils.intern(tableName); } public void unsetTableName() { @@ -420,7 +420,7 @@ public class Partition implements org.ap } public void setParameters(Map<String,String> parameters) { - this.parameters = parameters; + this.parameters = org.apache.hive.common.util.HiveStringUtils.intern(parameters); } public void unsetParameters() { Modified: hive/branches/spark/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SerDeInfo.java URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SerDeInfo.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SerDeInfo.java (original) +++ hive/branches/spark/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SerDeInfo.java Sat Sep 20 17:34:39 2014 @@ -137,9 +137,9 @@ public class SerDeInfo implements org.ap Map<String,String> parameters) { this(); - this.name = name; - this.serializationLib = serializationLib; - this.parameters = parameters; + this.name = org.apache.hive.common.util.HiveStringUtils.intern(name); + this.serializationLib = org.apache.hive.common.util.HiveStringUtils.intern(serializationLib); + this.parameters = org.apache.hive.common.util.HiveStringUtils.intern(parameters); } /** @@ -147,10 +147,10 @@ public class SerDeInfo implements org.ap */ public SerDeInfo(SerDeInfo other) { if (other.isSetName()) { - this.name = other.name; + this.name = org.apache.hive.common.util.HiveStringUtils.intern(other.name); } if (other.isSetSerializationLib()) { - this.serializationLib = other.serializationLib; + this.serializationLib = org.apache.hive.common.util.HiveStringUtils.intern(other.serializationLib); } if (other.isSetParameters()) { Map<String,String> __this__parameters = new HashMap<String,String>(); @@ -159,9 +159,9 @@ public class SerDeInfo implements org.ap String other_element_key = other_element.getKey(); String other_element_value = other_element.getValue(); - String __this__parameters_copy_key = other_element_key; + String __this__parameters_copy_key = org.apache.hive.common.util.HiveStringUtils.intern(other_element_key); - String __this__parameters_copy_value = other_element_value; + String __this__parameters_copy_value = org.apache.hive.common.util.HiveStringUtils.intern(other_element_value); __this__parameters.put(__this__parameters_copy_key, __this__parameters_copy_value); } @@ -185,7 +185,7 @@ public class SerDeInfo implements org.ap } public void setName(String name) { - this.name = name; + this.name = org.apache.hive.common.util.HiveStringUtils.intern(name); } public void unsetName() { @@ -208,7 +208,7 @@ public class SerDeInfo implements org.ap } public void setSerializationLib(String serializationLib) { - this.serializationLib = serializationLib; + this.serializationLib = org.apache.hive.common.util.HiveStringUtils.intern(serializationLib); } public void unsetSerializationLib() { @@ -242,7 +242,7 @@ public class SerDeInfo implements org.ap } public void setParameters(Map<String,String> parameters) { - this.parameters = parameters; + this.parameters = org.apache.hive.common.util.HiveStringUtils.intern(parameters); } public void unsetParameters() { Modified: hive/branches/spark/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StorageDescriptor.java URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StorageDescriptor.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StorageDescriptor.java (original) +++ hive/branches/spark/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StorageDescriptor.java Sat Sep 20 17:34:39 2014 @@ -216,17 +216,17 @@ public class StorageDescriptor implement { this(); this.cols = cols; - this.location = location; - this.inputFormat = inputFormat; - this.outputFormat = outputFormat; + this.location = org.apache.hive.common.util.HiveStringUtils.intern(location); + this.inputFormat = org.apache.hive.common.util.HiveStringUtils.intern(inputFormat); + this.outputFormat = org.apache.hive.common.util.HiveStringUtils.intern(outputFormat); this.compressed = compressed; setCompressedIsSet(true); this.numBuckets = numBuckets; setNumBucketsIsSet(true); this.serdeInfo = serdeInfo; - this.bucketCols = bucketCols; + this.bucketCols = org.apache.hive.common.util.HiveStringUtils.intern(bucketCols); this.sortCols = sortCols; - this.parameters = parameters; + this.parameters = org.apache.hive.common.util.HiveStringUtils.intern(parameters); } /** @@ -242,13 +242,13 @@ public class StorageDescriptor implement this.cols = __this__cols; } if (other.isSetLocation()) { - this.location = other.location; + this.location = org.apache.hive.common.util.HiveStringUtils.intern(other.location); } if (other.isSetInputFormat()) { - this.inputFormat = other.inputFormat; + this.inputFormat = org.apache.hive.common.util.HiveStringUtils.intern(other.inputFormat); } if (other.isSetOutputFormat()) { - this.outputFormat = other.outputFormat; + this.outputFormat = org.apache.hive.common.util.HiveStringUtils.intern(other.outputFormat); } this.compressed = other.compressed; this.numBuckets = other.numBuckets; @@ -276,9 +276,9 @@ public class StorageDescriptor implement String other_element_key = other_element.getKey(); String other_element_value = other_element.getValue(); - String __this__parameters_copy_key = other_element_key; + String __this__parameters_copy_key = org.apache.hive.common.util.HiveStringUtils.intern(other_element_key); - String __this__parameters_copy_value = other_element_value; + String __this__parameters_copy_value = org.apache.hive.common.util.HiveStringUtils.intern(other_element_value); __this__parameters.put(__this__parameters_copy_key, __this__parameters_copy_value); } @@ -356,7 +356,7 @@ public class StorageDescriptor implement } public void setLocation(String location) { - this.location = location; + this.location = org.apache.hive.common.util.HiveStringUtils.intern(location); } public void unsetLocation() { @@ -379,7 +379,7 @@ public class StorageDescriptor implement } public void setInputFormat(String inputFormat) { - this.inputFormat = inputFormat; + this.inputFormat = org.apache.hive.common.util.HiveStringUtils.intern(inputFormat); } public void unsetInputFormat() { @@ -402,7 +402,7 @@ public class StorageDescriptor implement } public void setOutputFormat(String outputFormat) { - this.outputFormat = outputFormat; + this.outputFormat = org.apache.hive.common.util.HiveStringUtils.intern(outputFormat); } public void unsetOutputFormat() { @@ -507,7 +507,7 @@ public class StorageDescriptor implement } public void setBucketCols(List<String> bucketCols) { - this.bucketCols = bucketCols; + this.bucketCols = org.apache.hive.common.util.HiveStringUtils.intern(bucketCols); } public void unsetBucketCols() { @@ -579,7 +579,7 @@ public class StorageDescriptor implement } public void setParameters(Map<String,String> parameters) { - this.parameters = parameters; + this.parameters = org.apache.hive.common.util.HiveStringUtils.intern(parameters); } public void unsetParameters() { Modified: hive/branches/spark/packaging/pom.xml URL: http://svn.apache.org/viewvc/hive/branches/spark/packaging/pom.xml?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/packaging/pom.xml (original) +++ hive/branches/spark/packaging/pom.xml Sat Sep 20 17:34:39 2014 @@ -158,6 +158,12 @@ </dependency> <dependency> <groupId>org.apache.hive</groupId> + <artifactId>hive-jdbc</artifactId> + <version>${project.version}</version> + <classifier>${hive.jdbc.driver.classifier}</classifier> + </dependency> + <dependency> + <groupId>org.apache.hive</groupId> <artifactId>hive-beeline</artifactId> <version>${project.version}</version> </dependency> Modified: hive/branches/spark/pom.xml URL: http://svn.apache.org/viewvc/hive/branches/spark/pom.xml?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/pom.xml (original) +++ hive/branches/spark/pom.xml Sat Sep 20 17:34:39 2014 @@ -105,7 +105,7 @@ <commons-exec.version>1.1</commons-exec.version> <commons-httpclient.version>3.0.1</commons-httpclient.version> <commons-io.version>2.4</commons-io.version> - <commons-lang.version>2.4</commons-lang.version> + <commons-lang.version>2.6</commons-lang.version> <commons-lang3.version>3.1</commons-lang3.version> <commons-logging.version>1.1.3</commons-logging.version> <commons-pool.version>1.5.4</commons-pool.version> @@ -137,6 +137,7 @@ <libfb303.version>0.9.0</libfb303.version> <libthrift.version>0.9.0</libthrift.version> <log4j.version>1.2.16</log4j.version> + <opencsv.version>2.3</opencsv.version> <mockito-all.version>1.9.5</mockito-all.version> <mina.version>2.0.0-M5</mina.version> <!--netty is not a direct dependency but due to a change Modified: hive/branches/spark/ql/if/queryplan.thrift URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/if/queryplan.thrift?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/if/queryplan.thrift (original) +++ hive/branches/spark/ql/if/queryplan.thrift Sat Sep 20 17:34:39 2014 @@ -57,6 +57,8 @@ enum OperatorType { MUX, DEMUX, EVENT, + ORCFILEMERGE, + RCFILEMERGE, } struct Operator { Modified: hive/branches/spark/ql/pom.xml URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/pom.xml?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/pom.xml (original) +++ hive/branches/spark/ql/pom.xml Sat Sep 20 17:34:39 2014 @@ -211,6 +211,11 @@ <artifactId>stax-api</artifactId> <version>${stax.version}</version> </dependency> + <dependency> + <groupId>net.sf.opencsv</groupId> + <artifactId>opencsv</artifactId> + <version>${opencsv.version}</version> + </dependency> <!-- test intra-project --> <!-- test inter-project --> <dependency> @@ -576,6 +581,7 @@ <include>com.twitter:parquet-hadoop-bundle</include> <include>org.apache.thrift:libthrift</include> <include>commons-lang:commons-lang</include> + <include>org.apache.commons:commons-lang3</include> <include>org.jodd:jodd-core</include> <include>org.json:json</include> <include>org.apache.avro:avro</include> @@ -593,6 +599,7 @@ <include>org.codehaus.jackson:jackson-core-asl</include> <include>org.codehaus.jackson:jackson-mapper-asl</include> <!--include>com.google.guava:guava</include--> + <include>net.sf.opencsv:opencsv</include> </includes> </artifactSet> <relocations> Modified: hive/branches/spark/ql/src/gen/thrift/gen-cpp/queryplan_types.cpp URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/gen/thrift/gen-cpp/queryplan_types.cpp?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/gen/thrift/gen-cpp/queryplan_types.cpp (original) +++ hive/branches/spark/ql/src/gen/thrift/gen-cpp/queryplan_types.cpp Sat Sep 20 17:34:39 2014 @@ -52,7 +52,9 @@ int _kOperatorTypeValues[] = { OperatorType::PTF, OperatorType::MUX, OperatorType::DEMUX, - OperatorType::EVENT + OperatorType::EVENT, + OperatorType::ORCFILEMERGE, + OperatorType::RCFILEMERGE }; const char* _kOperatorTypeNames[] = { "JOIN", @@ -76,9 +78,11 @@ const char* _kOperatorTypeNames[] = { "PTF", "MUX", "DEMUX", - "EVENT" + "EVENT", + "ORCFILEMERGE", + "RCFILEMERGE" }; -const std::map<int, const char*> _OperatorType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(22, _kOperatorTypeValues, _kOperatorTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL)); +const std::map<int, const char*> _OperatorType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(24, _kOperatorTypeValues, _kOperatorTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL)); int _kTaskTypeValues[] = { TaskType::MAP, Modified: hive/branches/spark/ql/src/gen/thrift/gen-cpp/queryplan_types.h URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/gen/thrift/gen-cpp/queryplan_types.h?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/gen/thrift/gen-cpp/queryplan_types.h (original) +++ hive/branches/spark/ql/src/gen/thrift/gen-cpp/queryplan_types.h Sat Sep 20 17:34:39 2014 @@ -57,7 +57,9 @@ struct OperatorType { PTF = 18, MUX = 19, DEMUX = 20, - EVENT = 21 + EVENT = 21, + ORCFILEMERGE = 22, + RCFILEMERGE = 23 }; }; Modified: hive/branches/spark/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java (original) +++ hive/branches/spark/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java Sat Sep 20 17:34:39 2014 @@ -7,6 +7,10 @@ package org.apache.hadoop.hive.ql.plan.api; +import java.util.Map; +import java.util.HashMap; +import org.apache.thrift.TEnum; + public enum OperatorType implements org.apache.thrift.TEnum { JOIN(0), MAPJOIN(1), Modified: hive/branches/spark/ql/src/gen/thrift/gen-php/Types.php URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/gen/thrift/gen-php/Types.php?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/gen/thrift/gen-php/Types.php (original) +++ hive/branches/spark/ql/src/gen/thrift/gen-php/Types.php Sat Sep 20 17:34:39 2014 @@ -57,6 +57,8 @@ final class OperatorType { const MUX = 19; const DEMUX = 20; const EVENT = 21; + const ORCFILEMERGE = 22; + const RCFILEMERGE = 23; static public $__names = array( 0 => 'JOIN', 1 => 'MAPJOIN', @@ -80,6 +82,8 @@ final class OperatorType { 19 => 'MUX', 20 => 'DEMUX', 21 => 'EVENT', + 22 => 'ORCFILEMERGE', + 23 => 'RCFILEMERGE', ); } Modified: hive/branches/spark/ql/src/gen/thrift/gen-py/queryplan/ttypes.py URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/gen/thrift/gen-py/queryplan/ttypes.py?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/gen/thrift/gen-py/queryplan/ttypes.py (original) +++ hive/branches/spark/ql/src/gen/thrift/gen-py/queryplan/ttypes.py Sat Sep 20 17:34:39 2014 @@ -67,6 +67,8 @@ class OperatorType: MUX = 19 DEMUX = 20 EVENT = 21 + ORCFILEMERGE = 22 + RCFILEMERGE = 23 _VALUES_TO_NAMES = { 0: "JOIN", @@ -91,6 +93,8 @@ class OperatorType: 19: "MUX", 20: "DEMUX", 21: "EVENT", + 22: "ORCFILEMERGE", + 23: "RCFILEMERGE", } _NAMES_TO_VALUES = { @@ -116,6 +120,8 @@ class OperatorType: "MUX": 19, "DEMUX": 20, "EVENT": 21, + "ORCFILEMERGE": 22, + "RCFILEMERGE": 23, } class TaskType: Modified: hive/branches/spark/ql/src/gen/thrift/gen-rb/queryplan_types.rb URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/gen/thrift/gen-rb/queryplan_types.rb?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/gen/thrift/gen-rb/queryplan_types.rb (original) +++ hive/branches/spark/ql/src/gen/thrift/gen-rb/queryplan_types.rb Sat Sep 20 17:34:39 2014 @@ -43,8 +43,10 @@ module OperatorType MUX = 19 DEMUX = 20 EVENT = 21 - VALUE_MAP = {0 => "JOIN", 1 => "MAPJOIN", 2 => "EXTRACT", 3 => "FILTER", 4 => "FORWARD", 5 => "GROUPBY", 6 => "LIMIT", 7 => "SCRIPT", 8 => "SELECT", 9 => "TABLESCAN", 10 => "FILESINK", 11 => "REDUCESINK", 12 => "UNION", 13 => "UDTF", 14 => "LATERALVIEWJOIN", 15 => "LATERALVIEWFORWARD", 16 => "HASHTABLESINK", 17 => "HASHTABLEDUMMY", 18 => "PTF", 19 => "MUX", 20 => "DEMUX", 21 => "EVENT"} - VALID_VALUES = Set.new([JOIN, MAPJOIN, EXTRACT, FILTER, FORWARD, GROUPBY, LIMIT, SCRIPT, SELECT, TABLESCAN, FILESINK, REDUCESINK, UNION, UDTF, LATERALVIEWJOIN, LATERALVIEWFORWARD, HASHTABLESINK, HASHTABLEDUMMY, PTF, MUX, DEMUX, EVENT]).freeze + ORCFILEMERGE = 22 + RCFILEMERGE = 23 + VALUE_MAP = {0 => "JOIN", 1 => "MAPJOIN", 2 => "EXTRACT", 3 => "FILTER", 4 => "FORWARD", 5 => "GROUPBY", 6 => "LIMIT", 7 => "SCRIPT", 8 => "SELECT", 9 => "TABLESCAN", 10 => "FILESINK", 11 => "REDUCESINK", 12 => "UNION", 13 => "UDTF", 14 => "LATERALVIEWJOIN", 15 => "LATERALVIEWFORWARD", 16 => "HASHTABLESINK", 17 => "HASHTABLEDUMMY", 18 => "PTF", 19 => "MUX", 20 => "DEMUX", 21 => "EVENT", 22 => "ORCFILEMERGE", 23 => "RCFILEMERGE"} + VALID_VALUES = Set.new([JOIN, MAPJOIN, EXTRACT, FILTER, FORWARD, GROUPBY, LIMIT, SCRIPT, SELECT, TABLESCAN, FILESINK, REDUCESINK, UNION, UDTF, LATERALVIEWJOIN, LATERALVIEWFORWARD, HASHTABLESINK, HASHTABLEDUMMY, PTF, MUX, DEMUX, EVENT, ORCFILEMERGE, RCFILEMERGE]).freeze end module TaskType Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/Driver.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/Driver.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/Driver.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/Driver.java Sat Sep 20 17:34:39 2014 @@ -503,9 +503,11 @@ public class Driver implements CommandPr // get mapping of tables to columns used ColumnAccessInfo colAccessInfo = sem.getColumnAccessInfo(); // colAccessInfo is set only in case of SemanticAnalyzer - Map<String, List<String>> tab2Cols = colAccessInfo != null ? colAccessInfo + Map<String, List<String>> selectTab2Cols = colAccessInfo != null ? colAccessInfo .getTableToColumnAccessMap() : null; - doAuthorizationV2(ss, op, inputs, outputs, command, tab2Cols); + Map<String, List<String>> updateTab2Cols = sem.getUpdateColumnAccessInfo() != null ? + sem.getUpdateColumnAccessInfo().getTableToColumnAccessMap() : null; + doAuthorizationV2(ss, op, inputs, outputs, command, selectTab2Cols, updateTab2Cols); return; } if (op == null) { @@ -696,7 +698,13 @@ public class Driver implements CommandPr } private static void doAuthorizationV2(SessionState ss, HiveOperation op, HashSet<ReadEntity> inputs, - HashSet<WriteEntity> outputs, String command, Map<String, List<String>> tab2cols) throws HiveException { + HashSet<WriteEntity> outputs, String command, Map<String, List<String>> tab2cols, + Map<String, List<String>> updateTab2Cols) throws HiveException { + + /* comment for reviewers -> updateTab2Cols needed to be separate from tab2cols because if I + pass tab2cols to getHivePrivObjects for the output case it will trip up insert/selects, + since the insert will get passed the columns from the select. + */ HiveAuthzContext.Builder authzContextBuilder = new HiveAuthzContext.Builder(); authzContextBuilder.setUserIpAddress(ss.getUserIpAddress()); @@ -704,7 +712,7 @@ public class Driver implements CommandPr HiveOperationType hiveOpType = getHiveOperationType(op); List<HivePrivilegeObject> inputsHObjs = getHivePrivObjects(inputs, tab2cols); - List<HivePrivilegeObject> outputHObjs = getHivePrivObjects(outputs, null); + List<HivePrivilegeObject> outputHObjs = getHivePrivObjects(outputs, updateTab2Cols); ss.getAuthorizerV2().checkPrivileges(hiveOpType, inputsHObjs, outputHObjs, authzContextBuilder.build()); } @@ -730,12 +738,6 @@ public class Driver implements CommandPr //do not authorize temporary uris continue; } - if (privObject instanceof ReadEntity && ((ReadEntity)privObject).isUpdateOrDelete()) { - // Skip this one, as we don't want to check select privileges for the table we're reading - // for an update or delete. - continue; - } - //support for authorization on partitions needs to be added String dbname = null; String objName = null; Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java Sat Sep 20 17:34:39 2014 @@ -84,7 +84,8 @@ public enum ErrorMsg { INVALID_PATH(10027, "Invalid path"), ILLEGAL_PATH(10028, "Path is not legal"), INVALID_NUMERICAL_CONSTANT(10029, "Invalid numerical constant"), - INVALID_ARRAYINDEX_CONSTANT(10030, "Non-constant expressions for array indexes not supported"), + INVALID_ARRAYINDEX_TYPE(10030, + "Not proper type for index of ARRAY. Currently, only integer type is supported"), INVALID_MAPINDEX_CONSTANT(10031, "Non-constant expression for map indexes not supported"), INVALID_MAPINDEX_TYPE(10032, "MAP key type does not match index expression type"), NON_COLLECTION_TYPE(10033, "[] not valid on non-collection types"), Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java Sat Sep 20 17:34:39 2014 @@ -27,6 +27,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import org.apache.commons.lang3.StringEscapeUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -34,10 +35,9 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; -import org.apache.hadoop.hive.ql.exec.FooterBuffer; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.io.HiveRecordReader; @@ -48,7 +48,6 @@ import org.apache.hadoop.hive.ql.plan.Fe import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; -import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; @@ -60,11 +59,8 @@ import org.apache.hadoop.hive.serde2.obj import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapred.InputFormat; @@ -82,6 +78,9 @@ public class FetchOperator implements Se static Log LOG = LogFactory.getLog(FetchOperator.class.getName()); static LogHelper console = new LogHelper(LOG); + public static final String FETCH_OPERATOR_DIRECTORY_LIST = + "hive.complete.dir.list"; + private boolean isNativeTable; private FetchWork work; protected Operator<?> operator; // operator tree for processing row further (option) @@ -352,6 +351,7 @@ public class FetchOperator implements Se } return; } else { + setFetchOperatorContext(job, work.getPartDir()); iterPath = work.getPartDir().iterator(); iterPartDesc = work.getPartDesc().iterator(); } @@ -380,6 +380,30 @@ public class FetchOperator implements Se } /** + * Set context for this fetch operator in to the jobconf. + * This helps InputFormats make decisions based on the scope of the complete + * operation. + * @param conf the configuration to modify + * @param partDirs the list of partition directories + */ + static void setFetchOperatorContext(JobConf conf, + ArrayList<Path> partDirs) { + if (partDirs != null) { + StringBuilder buff = new StringBuilder(); + boolean first = true; + for(Path p: partDirs) { + if (first) { + first = false; + } else { + buff.append('\t'); + } + buff.append(StringEscapeUtils.escapeJava(p.toString())); + } + conf.set(FETCH_OPERATOR_DIRECTORY_LIST, buff.toString()); + } + } + + /** * A cache of Object Inspector Settable Properties. */ private static Map<ObjectInspector, Boolean> oiSettableProperties = new HashMap<ObjectInspector, Boolean>(); @@ -748,7 +772,8 @@ public class FetchOperator implements Se */ private FileStatus[] listStatusUnderPath(FileSystem fs, Path p) throws IOException { boolean recursive = HiveConf.getBoolVar(job, HiveConf.ConfVars.HADOOPMAPREDINPUTDIRRECURSIVE); - if (!recursive) { + // If this is in acid format always read it recursively regardless of what the jobconf says. + if (!recursive && !AcidUtils.isAcid(p, job)) { return fs.listStatus(p); } List<FileStatus> results = new ArrayList<FileStatus>(); Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Sat Sep 20 17:34:39 2014 @@ -903,15 +903,15 @@ public final class FunctionRegistry { (PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b,PrimitiveCategory.STRING); } - if (FunctionRegistry.implicitConvertable(a, b)) { + if (FunctionRegistry.implicitConvertible(a, b)) { return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, pcB); } - if (FunctionRegistry.implicitConvertable(b, a)) { + if (FunctionRegistry.implicitConvertible(b, a)) { return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, pcA); } for (PrimitiveCategory t : numericTypeList) { - if (FunctionRegistry.implicitConvertable(pcA, t) - && FunctionRegistry.implicitConvertable(pcB, t)) { + if (FunctionRegistry.implicitConvertible(pcA, t) + && FunctionRegistry.implicitConvertible(pcB, t)) { return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, t); } } @@ -955,8 +955,8 @@ public final class FunctionRegistry { } for (PrimitiveCategory t : numericTypeList) { - if (FunctionRegistry.implicitConvertable(pcA, t) - && FunctionRegistry.implicitConvertable(pcB, t)) { + if (FunctionRegistry.implicitConvertible(pcA, t) + && FunctionRegistry.implicitConvertible(pcB, t)) { return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, t); } } @@ -1007,7 +1007,7 @@ public final class FunctionRegistry { return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, commonCat); } - public static boolean implicitConvertable(PrimitiveCategory from, PrimitiveCategory to) { + public static boolean implicitConvertible(PrimitiveCategory from, PrimitiveCategory to) { if (from == to) { return true; } @@ -1058,7 +1058,7 @@ public final class FunctionRegistry { * Returns whether it is possible to implicitly convert an object of Class * from to Class to. */ - public static boolean implicitConvertable(TypeInfo from, TypeInfo to) { + public static boolean implicitConvertible(TypeInfo from, TypeInfo to) { if (from.equals(to)) { return true; } @@ -1067,9 +1067,9 @@ public final class FunctionRegistry { // 2 TypeInfos from the same qualified type (varchar, decimal) should still be // seen as equivalent. if (from.getCategory() == Category.PRIMITIVE && to.getCategory() == Category.PRIMITIVE) { - return implicitConvertable( - ((PrimitiveTypeInfo)from).getPrimitiveCategory(), - ((PrimitiveTypeInfo)to).getPrimitiveCategory()); + return implicitConvertible( + ((PrimitiveTypeInfo) from).getPrimitiveCategory(), + ((PrimitiveTypeInfo) to).getPrimitiveCategory()); } return false; } @@ -1305,7 +1305,7 @@ public final class FunctionRegistry { // but there is a conversion cost. return 1; } - if (!exact && implicitConvertable(argumentPassed, argumentAccepted)) { + if (!exact && implicitConvertible(argumentPassed, argumentAccepted)) { return 1; } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java Sat Sep 20 17:34:39 2014 @@ -697,6 +697,7 @@ public class SMBMapJoinOperator extends // But if hive supports assigning bucket number for each partition, this can be vary public void setupContext(List<Path> paths) throws HiveException { int segmentLen = paths.size(); + FetchOperator.setFetchOperatorContext(jobConf, fetchWork.getPartDir()); FetchOperator[] segments = segmentsForSize(segmentLen); for (int i = 0 ; i < segmentLen; i++) { Path path = paths.get(i); Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java Sat Sep 20 17:34:39 2014 @@ -27,6 +27,7 @@ import org.antlr.runtime.CommonToken; import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.WordUtils; +import org.apache.commons.lang3.StringEscapeUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -2283,13 +2284,15 @@ public final class Utilities { * configuration which receives configured properties */ public static void copyTableJobPropertiesToConf(TableDesc tbl, JobConf job) { - String bucketString = tbl.getProperties() - .getProperty(hive_metastoreConstants.BUCKET_COUNT); - // copy the bucket count - if (bucketString != null) { - job.set(hive_metastoreConstants.BUCKET_COUNT, bucketString); + Properties tblProperties = tbl.getProperties(); + for(String name: tblProperties.stringPropertyNames()) { + if (job.get(name) == null) { + String val = (String) tblProperties.get(name); + if (val != null) { + job.set(name, StringEscapeUtils.escapeJava(val)); + } + } } - Map<String, String> jobProperties = tbl.getJobProperties(); if (jobProperties == null) { return; Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java Sat Sep 20 17:34:39 2014 @@ -850,7 +850,7 @@ public class DagUtils { throws IOException { FileSystem destFS = dest.getFileSystem(conf); - if (src != null) { + if (src != null && checkPreExisting(src, dest, conf) == false) { // copy the src to the destination and create local resource. // do not overwrite. LOG.info("Localizing resource because it does not exist: " + src + " to dest: " + dest); Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java Sat Sep 20 17:34:39 2014 @@ -141,8 +141,7 @@ public class TezJobMonitor { case RUNNING: if (!running) { perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_SUBMIT_TO_RUNNING); - console.printInfo("Status: Running (application id: " - +dagClient.getExecutionContext()+")\n"); + console.printInfo("Status: Running (" + dagClient.getExecutionContext() + ")\n"); for (String s: progressMap.keySet()) { perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_RUN_VERTEX + s); } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Sat Sep 20 17:34:39 2014 @@ -48,6 +48,7 @@ import org.apache.hadoop.hive.ql.exec.ve import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFAvgDecimal; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCount; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountMerge; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountStar; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFSumDecimal; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDouble; @@ -1898,7 +1899,7 @@ public class VectorizationContext { add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFMaxDecimal.class)); add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.NONE, GroupByDesc.Mode.HASH, VectorUDAFCountStar.class)); add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); - add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.MERGEPARTIAL, VectorUDAFSumLong.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.MERGEPARTIAL, VectorUDAFCountMerge.class)); add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java?rev=1626482&r1=1626481&r2=1626482&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java Sat Sep 20 17:34:39 2014 @@ -660,7 +660,7 @@ public final class VectorExpressionWrite @Override public Object writeValue(byte[] value, int start, int length) throws HiveException { this.text.set(value, start, length); - ((SettableStringObjectInspector) this.objectInspector).set(this.obj, this.text.toString()); + ((SettableStringObjectInspector) this.objectInspector).set(this.obj, this.text); return this.obj; } @@ -671,7 +671,7 @@ public final class VectorExpressionWrite field = initValue(null); } this.text.set(value, start, length); - ((SettableStringObjectInspector) this.objectInspector).set(field, this.text.toString()); + ((SettableStringObjectInspector) this.objectInspector).set(field, this.text); return field; }
