OOZIE-1457 Create a Hive Server 2 action (rkanter)
Project: http://git-wip-us.apache.org/repos/asf/oozie/repo Commit: http://git-wip-us.apache.org/repos/asf/oozie/commit/e332299d Tree: http://git-wip-us.apache.org/repos/asf/oozie/tree/e332299d Diff: http://git-wip-us.apache.org/repos/asf/oozie/diff/e332299d Branch: refs/remotes/trunk Commit: e332299dc86298e5f6e457f12cc089c354d8db3f Parents: 87fbc84 Author: Robert Kanter <[email protected]> Authored: Thu Sep 11 13:10:16 2014 -0700 Committer: Robert Kanter <[email protected]> Committed: Thu Sep 11 13:10:16 2014 -0700 ---------------------------------------------------------------------- .../java/org/apache/oozie/cli/OozieCLI.java | 2 + client/src/main/resources/hive2-action-0.1.xsd | 71 +++ core/pom.xml | 6 + core/src/main/conf/oozie-site.xml | 5 +- .../action/hadoop/Hive2ActionExecutor.java | 136 +++++ .../oozie/action/hadoop/Hive2Credentials.java | 79 +++ .../java/org/apache/oozie/test/XTestCase.java | 28 + .../oozie/test/hive/AbstractHiveService.java | 161 ++++++ .../org/apache/oozie/test/hive/MiniHS2.java | 212 ++++++++ .../site/twiki/DG_Hive2ActionExtension.twiki | 212 ++++++++ .../twiki/DG_UnifiedCredentialsModule.twiki | 11 +- docs/src/site/twiki/index.twiki | 1 + pom.xml | 30 ++ release-log.txt | 1 + sharelib/hive2/pom.xml | 524 +++++++++++++++++++ .../apache/oozie/action/hadoop/Hive2Main.java | 237 +++++++++ .../action/hadoop/TestHive2ActionExecutor.java | 227 ++++++++ sharelib/pom.xml | 1 + src/main/assemblies/sharelib.xml | 4 + webapp/pom.xml | 8 +- 20 files changed, 1951 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/oozie/blob/e332299d/client/src/main/java/org/apache/oozie/cli/OozieCLI.java ---------------------------------------------------------------------- diff --git a/client/src/main/java/org/apache/oozie/cli/OozieCLI.java b/client/src/main/java/org/apache/oozie/cli/OozieCLI.java index 79a9b68..f3ffd1f 100644 --- a/client/src/main/java/org/apache/oozie/cli/OozieCLI.java +++ b/client/src/main/java/org/apache/oozie/cli/OozieCLI.java @@ -1818,6 +1818,8 @@ public class OozieCLI { "ssh-action-0.1.xsd"))); sources.add(new StreamSource(Thread.currentThread().getContextClassLoader().getResourceAsStream( "ssh-action-0.2.xsd"))); + sources.add(new StreamSource(Thread.currentThread().getContextClassLoader().getResourceAsStream( + "hive2-action-0.1.xsd"))); SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); Schema schema = factory.newSchema(sources.toArray(new StreamSource[sources.size()])); Validator validator = schema.newValidator(); http://git-wip-us.apache.org/repos/asf/oozie/blob/e332299d/client/src/main/resources/hive2-action-0.1.xsd ---------------------------------------------------------------------- diff --git a/client/src/main/resources/hive2-action-0.1.xsd b/client/src/main/resources/hive2-action-0.1.xsd new file mode 100644 index 0000000..879f2e0 --- /dev/null +++ b/client/src/main/resources/hive2-action-0.1.xsd @@ -0,0 +1,71 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" + xmlns:hive2="uri:oozie:hive2-action:0.1" elementFormDefault="qualified" + targetNamespace="uri:oozie:hive2-action:0.1"> + + <xs:element name="hive2" type="hive2:ACTION"/> + + <xs:complexType name="ACTION"> + <xs:sequence> + <xs:element name="job-tracker" type="xs:string" minOccurs="0" maxOccurs="1"/> + <xs:element name="name-node" type="xs:string" minOccurs="0" maxOccurs="1"/> + <xs:element name="prepare" type="hive2:PREPARE" minOccurs="0" maxOccurs="1"/> + <xs:element name="job-xml" type="xs:string" minOccurs="0" maxOccurs="unbounded"/> + <xs:element name="configuration" type="hive2:CONFIGURATION" minOccurs="0" maxOccurs="1"/> + <xs:element name="jdbc-url" type="xs:string" minOccurs="1" maxOccurs="1"/> + <xs:element name="password" type="xs:string" minOccurs="0" maxOccurs="1"/> + <xs:element name="script" type="xs:string" minOccurs="1" maxOccurs="1"/> + <xs:element name="param" type="xs:string" minOccurs="0" maxOccurs="unbounded"/> + <xs:element name="argument" type="xs:string" minOccurs="0" maxOccurs="unbounded"/> + <xs:element name="file" type="xs:string" minOccurs="0" maxOccurs="unbounded"/> + <xs:element name="archive" type="xs:string" minOccurs="0" maxOccurs="unbounded"/> + </xs:sequence> + </xs:complexType> + + <xs:complexType name="CONFIGURATION"> + <xs:sequence> + <xs:element name="property" minOccurs="1" maxOccurs="unbounded"> + <xs:complexType> + <xs:sequence> + <xs:element name="name" minOccurs="1" maxOccurs="1" type="xs:string"/> + <xs:element name="value" minOccurs="1" maxOccurs="1" type="xs:string"/> + <xs:element name="description" minOccurs="0" maxOccurs="1" type="xs:string"/> + </xs:sequence> + </xs:complexType> + </xs:element> + </xs:sequence> + </xs:complexType> + + <xs:complexType name="PREPARE"> + <xs:sequence> + <xs:element name="delete" type="hive2:DELETE" minOccurs="0" maxOccurs="unbounded"/> + <xs:element name="mkdir" type="hive2:MKDIR" minOccurs="0" maxOccurs="unbounded"/> + </xs:sequence> + </xs:complexType> + + <xs:complexType name="DELETE"> + <xs:attribute name="path" type="xs:string" use="required"/> + </xs:complexType> + + <xs:complexType name="MKDIR"> + <xs:attribute name="path" type="xs:string" use="required"/> + </xs:complexType> + +</xs:schema> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/oozie/blob/e332299d/core/pom.xml ---------------------------------------------------------------------- diff --git a/core/pom.xml b/core/pom.xml index 59b6ecd..7cd1f70 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -361,6 +361,12 @@ </exclusion> </exclusions> </dependency> + + <dependency> + <groupId>org.apache.hive</groupId> + <artifactId>hive-jdbc</artifactId> + <scope>provided</scope> + </dependency> </dependencies> <build> http://git-wip-us.apache.org/repos/asf/oozie/blob/e332299d/core/src/main/conf/oozie-site.xml ---------------------------------------------------------------------- diff --git a/core/src/main/conf/oozie-site.xml b/core/src/main/conf/oozie-site.xml index 32f87f5..3b71760 100644 --- a/core/src/main/conf/oozie-site.xml +++ b/core/src/main/conf/oozie-site.xml @@ -30,7 +30,8 @@ org.apache.oozie.action.hadoop.HiveActionExecutor, org.apache.oozie.action.hadoop.ShellActionExecutor, org.apache.oozie.action.hadoop.SqoopActionExecutor, - org.apache.oozie.action.hadoop.DistcpActionExecutor + org.apache.oozie.action.hadoop.DistcpActionExecutor, + org.apache.oozie.action.hadoop.HiveServer2ActionExecutor </value> </property> @@ -40,7 +41,7 @@ shell-action-0.1.xsd,shell-action-0.2.xsd,shell-action-0.3.xsd,email-action-0.1.xsd,email-action-0.2.xsd, hive-action-0.2.xsd,hive-action-0.3.xsd,hive-action-0.4.xsd,hive-action-0.5.xsd,sqoop-action-0.2.xsd, sqoop-action-0.3.xsd,sqoop-action-0.4.xsd,ssh-action-0.1.xsd,ssh-action-0.2.xsd,distcp-action-0.1.xsd, - distcp-action-0.2.xsd,oozie-sla-0.1.xsd,oozie-sla-0.2.xsd + distcp-action-0.2.xsd,oozie-sla-0.1.xsd,oozie-sla-0.2.xsd,hive2-action-0.1.xsd </value> </property> http://git-wip-us.apache.org/repos/asf/oozie/blob/e332299d/core/src/main/java/org/apache/oozie/action/hadoop/Hive2ActionExecutor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/oozie/action/hadoop/Hive2ActionExecutor.java b/core/src/main/java/org/apache/oozie/action/hadoop/Hive2ActionExecutor.java new file mode 100644 index 0000000..7fec585 --- /dev/null +++ b/core/src/main/java/org/apache/oozie/action/hadoop/Hive2ActionExecutor.java @@ -0,0 +1,136 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.oozie.action.hadoop; + +import static org.apache.oozie.action.hadoop.LauncherMapper.CONF_OOZIE_ACTION_MAIN_CLASS; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.oozie.action.ActionExecutorException; +import org.apache.oozie.client.WorkflowAction; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.Namespace; + +public class Hive2ActionExecutor extends ScriptLanguageActionExecutor { + + private static final String HIVE2_MAIN_CLASS_NAME = "org.apache.oozie.action.hadoop.Hive2Main"; + static final String HIVE2_JDBC_URL = "oozie.hive2.jdbc.url"; + static final String HIVE2_PASSWORD = "oozie.hive2.password"; + static final String HIVE2_SCRIPT = "oozie.hive2.script"; + static final String HIVE2_PARAMS = "oozie.hive2.params"; + static final String HIVE2_ARGS = "oozie.hive2.args"; + + public Hive2ActionExecutor() { + super("hive2"); + } + + @Override + public List<Class> getLauncherClasses() { + List<Class> classes = new ArrayList<Class>(); + try { + classes.add(Class.forName(HIVE2_MAIN_CLASS_NAME)); + } + catch (ClassNotFoundException e) { + throw new RuntimeException("Class not found", e); + } + return classes; + } + + @Override + protected String getLauncherMain(Configuration launcherConf, Element actionXml) { + return launcherConf.get(CONF_OOZIE_ACTION_MAIN_CLASS, HIVE2_MAIN_CLASS_NAME); + } + + @Override + @SuppressWarnings("unchecked") + Configuration setupActionConf(Configuration actionConf, Context context, Element actionXml, + Path appPath) throws ActionExecutorException { + Configuration conf = super.setupActionConf(actionConf, context, actionXml, appPath); + Namespace ns = actionXml.getNamespace(); + + String jdbcUrl = actionXml.getChild("jdbc-url", ns).getTextTrim(); + + String password = null; + Element passwordElement = actionXml.getChild("password", ns); + if (passwordElement != null) { + password = actionXml.getChild("password", ns).getTextTrim(); + } + + String script = actionXml.getChild("script", ns).getTextTrim(); + String scriptName = new Path(script).getName(); + String beelineScriptContent = context.getProtoActionConf().get(HIVE2_SCRIPT); + + if (beelineScriptContent == null){ + addToCache(conf, appPath, script + "#" + scriptName, false); + } + + List<Element> params = (List<Element>) actionXml.getChildren("param", ns); + String[] strParams = new String[params.size()]; + for (int i = 0; i < params.size(); i++) { + strParams[i] = params.get(i).getTextTrim(); + } + String[] strArgs = null; + List<Element> eArgs = actionXml.getChildren("argument", ns); + if (eArgs != null && eArgs.size() > 0) { + strArgs = new String[eArgs.size()]; + for (int i = 0; i < eArgs.size(); i++) { + strArgs[i] = eArgs.get(i).getTextTrim(); + } + } + + setHive2Props(conf, jdbcUrl, password, scriptName, strParams, strArgs); + return conf; + } + + public static void setHive2Props(Configuration conf, String jdbcUrl, String password, String script, String[] params, + String[] args) { + conf.set(HIVE2_JDBC_URL, jdbcUrl); + if (password != null) { + conf.set(HIVE2_PASSWORD, password); + } + conf.set(HIVE2_SCRIPT, script); + MapReduceMain.setStrings(conf, HIVE2_PARAMS, params); + MapReduceMain.setStrings(conf, HIVE2_ARGS, args); + } + + @Override + protected boolean getCaptureOutput(WorkflowAction action) throws JDOMException { + return false; + } + + /** + * Return the sharelib name for the action. + * + * @return returns <code>hive2</code>. + * @param actionXml + */ + @Override + protected String getDefaultShareLibName(Element actionXml) { + return "hive2"; + } + + @Override + protected String getScriptName() { + return HIVE2_SCRIPT; + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/oozie/blob/e332299d/core/src/main/java/org/apache/oozie/action/hadoop/Hive2Credentials.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/oozie/action/hadoop/Hive2Credentials.java b/core/src/main/java/org/apache/oozie/action/hadoop/Hive2Credentials.java new file mode 100644 index 0000000..195956c --- /dev/null +++ b/core/src/main/java/org/apache/oozie/action/hadoop/Hive2Credentials.java @@ -0,0 +1,79 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.oozie.action.hadoop; + +import java.sql.Connection; +import java.sql.DriverManager; +import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.security.token.Token; +import org.apache.hive.jdbc.HiveConnection; +import org.apache.oozie.ErrorCode; +import org.apache.oozie.action.ActionExecutor.Context; +import org.apache.oozie.util.XLog; + +/** + * Credentials implementation to store in jobConf, Hive Server 2 specific properties + * User specifies these credential properties along with the action configuration + * The jobConf is used further to pass credentials to the tasks while running + * Oozie server should be configured to use this class by including it via property 'oozie.credentials.credentialclasses' + * User can extend the parent class to implement own class as well + * for handling custom token-based credentials and add to the above server property + */ +public class Hive2Credentials extends Credentials { + + private static final String USER_NAME = "user.name"; + private static final String HIVE2_JDBC_URL = "hive2.jdbc.url"; + private static final String HIVE2_SERVER_PRINCIPAL = "hive2.server.principal"; + + @Override + public void addtoJobConf(JobConf jobconf, CredentialsProperties props, Context context) throws Exception { + try { + // load the driver + Class.forName("org.apache.hive.jdbc.HiveDriver"); + + String url = props.getProperties().get(HIVE2_JDBC_URL); + if (url == null || url.isEmpty()) { + throw new CredentialException(ErrorCode.E0510, + HIVE2_JDBC_URL + " is required to get hive server 2 credential"); + } + String principal = props.getProperties().get(HIVE2_SERVER_PRINCIPAL); + if (principal == null || principal.isEmpty()) { + throw new CredentialException(ErrorCode.E0510, + HIVE2_SERVER_PRINCIPAL + " is required to get hive server 2 credential"); + } + url = url + ";principal=" + principal; + Connection con = DriverManager.getConnection(url); + XLog.getLog(getClass()).debug("Connected successfully to " + url); + // get delegation token for the given proxy user + String tokenStr = ((HiveConnection)con).getDelegationToken(jobconf.get(USER_NAME), principal); + XLog.getLog(getClass()).debug("Got token"); + con.close(); + + Token<DelegationTokenIdentifier> hive2Token = new Token<DelegationTokenIdentifier>(); + hive2Token.decodeFromUrlString(tokenStr); + jobconf.getCredentials().addToken(new Text("hive.server2.delegation.token"), hive2Token); + XLog.getLog(getClass()).debug("Added the Hive Server 2 token in job conf"); + } + catch (Exception e) { + XLog.getLog(getClass()).warn("Exception in addtoJobConf", e); + throw e; + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/oozie/blob/e332299d/core/src/test/java/org/apache/oozie/test/XTestCase.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/oozie/test/XTestCase.java b/core/src/test/java/org/apache/oozie/test/XTestCase.java index 0dd7d71..09631e0 100644 --- a/core/src/test/java/org/apache/oozie/test/XTestCase.java +++ b/core/src/test/java/org/apache/oozie/test/XTestCase.java @@ -50,6 +50,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MiniMRCluster; import org.apache.hadoop.security.authorize.ProxyUsers; @@ -80,6 +81,7 @@ import org.apache.oozie.sla.SLASummaryBean; import org.apache.oozie.store.CoordinatorStore; import org.apache.oozie.store.StoreException; import org.apache.oozie.test.MiniHCatServer.RUNMODE; +import org.apache.oozie.test.hive.MiniHS2; import org.apache.oozie.util.IOUtils; import org.apache.oozie.util.ParamChecker; import org.apache.oozie.util.XConfiguration; @@ -412,6 +414,9 @@ public abstract class XTestCase extends TestCase { */ @Override protected void tearDown() throws Exception { + if (hiveserver2 != null) { + hiveserver2.stop(); + } resetSystemProperties(); sysProps = null; testCaseDir = null; @@ -873,6 +878,7 @@ public abstract class XTestCase extends TestCase { private static MiniDFSCluster dfsCluster2 = null; private static MiniMRCluster mrCluster = null; private static MiniHCatServer hcatServer = null; + private static MiniHS2 hiveserver2 = null; private void setUpEmbeddedHadoop(String testCaseDir) throws Exception { if (dfsCluster == null && mrCluster == null) { @@ -991,6 +997,28 @@ public abstract class XTestCase extends TestCase { } } + protected void setupHiveServer2() throws Exception { + if (hiveserver2 == null) { + setSystemProperty("test.tmp.dir", getTestCaseDir()); + // Make HS2 use our Mini cluster by copying all configs to HiveConf; also had to hack MiniHS2 + HiveConf hconf = new HiveConf(); + Configuration jobConf = createJobConf(); + for (Map.Entry<String, String> pair: jobConf) { + hconf.set(pair.getKey(), pair.getValue()); + } + hiveserver2 = new MiniHS2(hconf, dfsCluster.getFileSystem()); + hiveserver2.start(new HashMap<String, String>()); + } + } + + protected String getHiveServer2JdbcURL() { + return hiveserver2.getJdbcURL(); + } + + protected String getHiveServer2JdbcURL(String dbName) { + return hiveserver2.getJdbcURL(dbName); + } + private static void shutdownMiniCluster() { try { if (mrCluster != null) { http://git-wip-us.apache.org/repos/asf/oozie/blob/e332299d/core/src/test/java/org/apache/oozie/test/hive/AbstractHiveService.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/oozie/test/hive/AbstractHiveService.java b/core/src/test/java/org/apache/oozie/test/hive/AbstractHiveService.java new file mode 100644 index 0000000..f97485e --- /dev/null +++ b/core/src/test/java/org/apache/oozie/test/hive/AbstractHiveService.java @@ -0,0 +1,161 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.oozie.test.hive; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; + +// TODO: This class and MiniHS2 are copied from the org.apache.hive.jdbc package in Hive 13.1; we can remove them once +// Hive publishes its "hive-it-unit" artifact to maven. +/*** + * Base class for Hive service + * AbstractHiveService. + * + */ +public abstract class AbstractHiveService { + private HiveConf hiveConf = null; + private String hostname; + private int binaryPort; + private int httpPort; + private boolean startedHiveService = false; + private List<String> addedProperties = new ArrayList<String>(); + + public AbstractHiveService(HiveConf hiveConf, String hostname, int binaryPort, int httpPort) { + this.hiveConf = hiveConf; + this.hostname = hostname; + this.binaryPort = binaryPort; + this.httpPort = httpPort; + } + + /** + * Get Hive conf + * @return + */ + public HiveConf getHiveConf() { + return hiveConf; + } + + /** + * Get config property + * @param propertyKey + * @return + */ + public String getConfProperty(String propertyKey) { + return hiveConf.get(propertyKey); + } + + /** + * Set config property + * @param propertyKey + * @param propertyValue + */ + public void setConfProperty(String propertyKey, String propertyValue) { + System.setProperty(propertyKey, propertyValue); + hiveConf.set(propertyKey, propertyValue); + addedProperties.add(propertyKey); + } + + /** + * Create system properties set by this server instance. This ensures that + * the changes made by current test are not impacting subsequent tests. + */ + public void clearProperties() { + for (String propKey : addedProperties ) { + System.clearProperty(propKey); + } + } + + /** + * Retrieve warehouse directory + * @return + */ + public Path getWareHouseDir() { + return new Path(hiveConf.getVar(ConfVars.METASTOREWAREHOUSE)); + } + + public void setWareHouseDir(String wareHouseURI) { + verifyNotStarted(); + System.setProperty(ConfVars.METASTOREWAREHOUSE.varname, wareHouseURI); + hiveConf.setVar(ConfVars.METASTOREWAREHOUSE, wareHouseURI); + } + + /** + * Set service host + * @param hostName + */ + public void setHost(String hostName) { + this.hostname = hostName; + } + + // get service host + protected String getHost() { + return hostname; + } + + /** + * Set binary service port # + * @param portNum + */ + public void setBinaryPort(int portNum) { + this.binaryPort = portNum; + } + + /** + * Set http service port # + * @param portNum + */ + public void setHttpPort(int portNum) { + this.httpPort = portNum; + } + + // Get binary service port # + protected int getBinaryPort() { + return binaryPort; + } + + // Get http service port # + protected int getHttpPort() { + return httpPort; + } + + public boolean isStarted() { + return startedHiveService; + } + + protected void setStarted(boolean hiveServiceStatus) { + this.startedHiveService = hiveServiceStatus; + } + + protected void verifyStarted() { + if (!isStarted()) { + throw new IllegalStateException("HiveServer2 is not running"); + } + } + + protected void verifyNotStarted() { + if (isStarted()) { + throw new IllegalStateException("HiveServer2 already running"); + } + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/oozie/blob/e332299d/core/src/test/java/org/apache/oozie/test/hive/MiniHS2.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/oozie/test/hive/MiniHS2.java b/core/src/test/java/org/apache/oozie/test/hive/MiniHS2.java new file mode 100644 index 0000000..9b4c400 --- /dev/null +++ b/core/src/test/java/org/apache/oozie/test/hive/MiniHS2.java @@ -0,0 +1,212 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.oozie.test.hive; + +import static org.junit.Assert.assertNotNull; + +import java.io.File; +import java.io.IOException; +import java.util.Map; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.HiveMetaStore; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.shims.HadoopShims.MiniDFSShim; +import org.apache.hadoop.hive.shims.HadoopShims.MiniMrShim; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hive.service.Service; +import org.apache.hive.service.cli.CLIServiceClient; +import org.apache.hive.service.cli.SessionHandle; +import org.apache.hive.service.cli.thrift.ThriftBinaryCLIService; +import org.apache.hive.service.cli.thrift.ThriftCLIServiceClient; +import org.apache.hive.service.cli.thrift.ThriftHttpCLIService; +import org.apache.hive.service.server.HiveServer2; + +import com.google.common.io.Files; + +// TODO: This class and AbstractHiveService are copied from the org.apache.hive.jdbc package in Hive 13.1; we can remove them once +// Hive publishes its "hive-it-unit" artifact to maven. +// TODO: We also had to make some changes to the constructor to get this to work properly; hopefully by the time we pick up MiniHS2 +// from Hive itself they'll have fixed it +public class MiniHS2 extends AbstractHiveService { + private static final String driverName = "org.apache.hive.jdbc.HiveDriver"; + private HiveServer2 hiveServer2 = null; + private final File baseDir; + private final Path baseDfsDir; + private static final AtomicLong hs2Counter = new AtomicLong(); + private static final String HS2_BINARY_MODE = "binary"; + private static final String HS2_HTTP_MODE = "http"; + private MiniMrShim mr; + private MiniDFSShim dfs; + + public MiniHS2(HiveConf hiveConf, FileSystem fs) throws IOException { + // MiniHS2 normally only lets you do either "local" mode or normal mode. We couldn't use "local" mode because it forks out + // a process to run a shell script (that we don't have) to run Hadoop jobs. And we didn't want to use normal mode because that + // creates Mini MR and DFS clusters, which we already have setup for Oozie. Our hacking here involved deleting the Hive Mini + // MR/DFS cluster code and passing in our jobConf in the hiveConf so that HS2 would use our Mini MR/DFS cluster. + super(hiveConf, "localhost", MetaStoreUtils.findFreePort(), MetaStoreUtils.findFreePort()); + baseDir = Files.createTempDir(); + baseDfsDir = new Path(new Path(fs.getUri()), "/base"); + String metaStoreURL = "jdbc:derby:" + baseDir.getAbsolutePath() + File.separator + "test_metastore-" + + hs2Counter.incrementAndGet() + ";create=true"; + + fs.mkdirs(baseDfsDir); + Path wareHouseDir = new Path(baseDfsDir, "warehouse"); + fs.mkdirs(wareHouseDir); + setWareHouseDir(wareHouseDir.toString()); + System.setProperty(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname, metaStoreURL); + hiveConf.setVar(HiveConf.ConfVars.METASTORECONNECTURLKEY, metaStoreURL); + // reassign a new port, just in case if one of the MR services grabbed the last one + setBinaryPort(MetaStoreUtils.findFreePort()); + hiveConf.setVar(ConfVars.HIVE_SERVER2_TRANSPORT_MODE, HS2_BINARY_MODE); + hiveConf.setVar(ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST, getHost()); + hiveConf.setIntVar(ConfVars.HIVE_SERVER2_THRIFT_PORT, getBinaryPort()); + hiveConf.setIntVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_PORT, getHttpPort()); + HiveMetaStore.HMSHandler.resetDefaultDBFlag(); + + Path scratchDir = new Path(baseDfsDir, "scratch"); + fs.mkdirs(scratchDir); + System.setProperty(HiveConf.ConfVars.SCRATCHDIR.varname, scratchDir.toString()); + System.setProperty(HiveConf.ConfVars.LOCALSCRATCHDIR.varname, + baseDir.getPath() + File.separator + "scratch"); + } + + public void start(Map<String, String> confOverlay) throws Exception { + hiveServer2 = new HiveServer2(); + // Set confOverlay parameters + for (Map.Entry<String, String> entry : confOverlay.entrySet()) { + setConfProperty(entry.getKey(), entry.getValue()); + } + hiveServer2.init(getHiveConf()); + hiveServer2.start(); + waitForStartup(); + setStarted(true); + } + + public void stop() { + verifyStarted(); + hiveServer2.stop(); + setStarted(false); + try { + if (mr != null) { + mr.shutdown(); + mr = null; + } + if (dfs != null) { + dfs.shutdown(); + dfs = null; + } + } catch (IOException e) { + // Ignore errors cleaning up miniMR + } + FileUtils.deleteQuietly(baseDir); + } + + public CLIServiceClient getServiceClient() { + verifyStarted(); + return getServiceClientInternal(); + } + + public CLIServiceClient getServiceClientInternal() { + for (Service service : hiveServer2.getServices()) { + if (service instanceof ThriftBinaryCLIService) { + return new ThriftCLIServiceClient((ThriftBinaryCLIService) service); + } + if (service instanceof ThriftHttpCLIService) { + return new ThriftCLIServiceClient((ThriftHttpCLIService) service); + } + } + throw new IllegalStateException("HiveServer2 not running Thrift service"); + } + + /** + * return connection URL for this server instance + * @return + */ + public String getJdbcURL() { + return getJdbcURL("default"); + } + + /** + * return connection URL for this server instance + * @param dbName - DB name to be included in the URL + * @return + */ + public String getJdbcURL(String dbName) { + return getJdbcURL(dbName, ""); + } + + /** + * return connection URL for this server instance + * @param dbName - DB name to be included in the URL + * @param urlExtension - Addional string to be appended to URL + * @return + */ + public String getJdbcURL(String dbName, String urlExtension) { + assertNotNull("URL extension shouldn't be null", urlExtension); + String transportMode = getConfProperty(ConfVars.HIVE_SERVER2_TRANSPORT_MODE.varname); + if(transportMode != null && (transportMode.equalsIgnoreCase(HS2_HTTP_MODE))) { + return "jdbc:hive2://" + getHost() + ":" + getHttpPort() + "/" + dbName; + } + else { + return "jdbc:hive2://" + getHost() + ":" + getBinaryPort() + "/" + dbName + urlExtension; + } + } + + public static String getJdbcDriverName() { + return driverName; + } + + public MiniMrShim getMR() { + return mr; + } + + public MiniDFSShim getDFS() { + return dfs; + } + + private void waitForStartup() throws Exception { + int waitTime = 0; + long startupTimeout = 1000L * 1000000000L; + CLIServiceClient hs2Client = getServiceClientInternal(); + SessionHandle sessionHandle = null; + do { + Thread.sleep(500L); + waitTime += 500L; + if (waitTime > startupTimeout) { + throw new TimeoutException("Couldn't access new HiveServer2: " + getJdbcURL()); + } + try { + sessionHandle = hs2Client.openSession("foo", "bar"); + } catch (Exception e) { + // service not started yet + continue; + } + hs2Client.closeSession(sessionHandle); + break; + } while (true); + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/oozie/blob/e332299d/docs/src/site/twiki/DG_Hive2ActionExtension.twiki ---------------------------------------------------------------------- diff --git a/docs/src/site/twiki/DG_Hive2ActionExtension.twiki b/docs/src/site/twiki/DG_Hive2ActionExtension.twiki new file mode 100644 index 0000000..e09040d --- /dev/null +++ b/docs/src/site/twiki/DG_Hive2ActionExtension.twiki @@ -0,0 +1,212 @@ +<noautolink> + +[[index][::Go back to Oozie Documentation Index::]] + +----- + +---+!! Oozie Hive 2 Action Extension + +%TOC% + +---++ Hive 2 Action + +The =hive2= action runs Beeline to connect to Hive Server 2. + +The workflow job will wait until the Hive Server 2 job completes before +continuing to the next action. + +To run the Hive Server 2 job, you have to configure the =hive2= action with the +=job-tracker=, =name-node=, =jdbc-url=, =password=, and Hive =script= elements as +well as the necessary parameters and configuration. + +A =hive2= action can be configured to create or delete HDFS directories +before starting the Hive Server 2 job. + +Oozie EL expressions can be used in the inline configuration. Property +values specified in the =configuration= element override values specified +in the =job-xml= file. + +As with Hadoop =map-reduce= jobs, it is possible to add files and +archives in order to make them available to Beeline. Refer to the +[WorkflowFunctionalSpec#FilesAchives][Adding Files and Archives for the Job] +section for more information about this feature. + +Oozie Hive 2 action supports Hive scripts with parameter variables, their +syntax is =${VARIABLES}=. + +*Syntax:* + +<verbatim> +<workflow-app name="[WF-DEF-NAME]" xmlns="uri:oozie:workflow:0.4"> + ... + <action name="[NODE-NAME]"> + <hive2 xmlns="uri:oozie:hive2-action:0.1"> + <job-tracker>[JOB-TRACKER]</job-tracker> + <name-node>[NAME-NODE]</name-node> + <prepare> + <delete path="[PATH]"/> + ... + <mkdir path="[PATH]"/> + ... + </prepare> + <job-xml>[HIVE SETTINGS FILE]</job-xml> + <configuration> + <property> + <name>[PROPERTY-NAME]</name> + <value>[PROPERTY-VALUE]</value> + </property> + ... + </configuration> + <jdbc-url>[jdbc:hive2://HOST:10000/default]</jdbc-url> + <password>[PASS]</password> + <script>[HIVE-SCRIPT]</script> + <param>[PARAM-VALUE]</param> + ... + <param>[PARAM-VALUE]</param> + <argument>[ARG-VALUE]</argument> + ... + <argument>[ARG-VALUE]</argument> + <file>[FILE-PATH]</file> + ... + <archive>[FILE-PATH]</archive> + ... + </hive2> + <ok to="[NODE-NAME]"/> + <error to="[NODE-NAME]"/> + </action> + ... +</workflow-app> +</verbatim> + +The =prepare= element, if present, indicates a list of paths to delete +or create before starting the job. Specified paths must start with =hdfs://HOST:PORT=. + +The =job-xml= element, if present, specifies a file containing configuration +for Beeline. Multiple =job-xml= elements are allowed in order to specify multiple =job.xml= files. + +The =configuration= element, if present, contains configuration +properties that are passed to the Beeline job. + +The =jdbc-url= element must contain the JDBC URL for the Hive Server 2. Beeline will use this to know where to connect to. + +The =password= element must contain the password of the current user. However, the =password= is only used if Hive Server 2 is +backed by something requiring a password (e.g. LDAP); non-secured Hive Server 2 or Kerberized Hive Server 2 don't require a password +so in those cases the =password= is ignored and can be omitted from the action XML. It is up to the user to ensure that a password +is specified when required. + +The =script= element must contain the path of the Hive script to +execute. The Hive script can be templatized with variables of the form +=${VARIABLE}=. The values of these variables can then be specified +using the =params= element. + +The =params= element, if present, contains parameters to be passed to +the Hive script. + +The =argument= element, if present, contains arguments to be passed as-is to Beeline. + +All the above elements can be parameterized (templatized) using EL +expressions. + +*Example:* + +<verbatim> +<workflow-app name="sample-wf" xmlns="uri:oozie:workflow:0.4"> + ... + <action name="my-hive2-action"> + <hive2 xmlns="uri:oozie:hive2-action:0.1"> + <job-traker>foo:8021</job-tracker> + <name-node>bar:8020</name-node> + <prepare> + <delete path="${jobOutput}"/> + </prepare> + <configuration> + <property> + <name>mapred.compress.map.output</name> + <value>true</value> + </property> + </configuration> + <jdbc-url>jdbc:hive2://localhost:10000/default</jdbc-url> + <password>foo</password> + <script>myscript.q</script> + <param>InputDir=/home/rkanter/input-data</param> + <param>OutputDir=${jobOutput}</param> + </hive2> + <ok to="my-other-action"/> + <error to="error-cleanup"/> + </action> + ... +</workflow-app> +</verbatim> + + +---+++ Security + +As mentioned above, =password= is only used in cases where Hive Server 2 is backed by something requiring a password (e.g. LDAP). +Non-secured Hive Server 2 and Kerberized Hive Server 2 don't require a password so in these cases it can be omitted. +See [[DG_UnifiedCredentialsModule][here]] for more information on the configuration for using the Hive Server 2 Action +with a Kerberized Hive Server 2. + +---++ Appendix, Hive 2 XML-Schema + +---+++ AE.A Appendix A, Hive 2 XML-Schema + +---++++ Hive 2 Action Schema Version 0.1 +<verbatim> +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" + xmlns:hive2="uri:oozie:hive2-action:0.1" elementFormDefault="qualified" + targetNamespace="uri:oozie:hive2-action:0.1"> +. + <xs:element name="hive2" type="hive2:ACTION"/> +. + <xs:complexType name="ACTION"> + <xs:sequence> + <xs:element name="job-tracker" type="xs:string" minOccurs="0" maxOccurs="1"/> + <xs:element name="name-node" type="xs:string" minOccurs="0" maxOccurs="1"/> + <xs:element name="prepare" type="hive2:PREPARE" minOccurs="0" maxOccurs="1"/> + <xs:element name="job-xml" type="xs:string" minOccurs="0" maxOccurs="unbounded"/> + <xs:element name="configuration" type="hive2:CONFIGURATION" minOccurs="0" maxOccurs="1"/> + <xs:element name="jdbc-url" type="xs:string" minOccurs="1" maxOccurs="1"/> + <xs:element name="password" type="xs:string" minOccurs="0" maxOccurs="1"/> + <xs:element name="script" type="xs:string" minOccurs="1" maxOccurs="1"/> + <xs:element name="param" type="xs:string" minOccurs="0" maxOccurs="unbounded"/> + <xs:element name="argument" type="xs:string" minOccurs="0" maxOccurs="unbounded"/> + <xs:element name="file" type="xs:string" minOccurs="0" maxOccurs="unbounded"/> + <xs:element name="archive" type="xs:string" minOccurs="0" maxOccurs="unbounded"/> + </xs:sequence> + </xs:complexType> +. + <xs:complexType name="CONFIGURATION"> + <xs:sequence> + <xs:element name="property" minOccurs="1" maxOccurs="unbounded"> + <xs:complexType> + <xs:sequence> + <xs:element name="name" minOccurs="1" maxOccurs="1" type="xs:string"/> + <xs:element name="value" minOccurs="1" maxOccurs="1" type="xs:string"/> + <xs:element name="description" minOccurs="0" maxOccurs="1" type="xs:string"/> + </xs:sequence> + </xs:complexType> + </xs:element> + </xs:sequence> + </xs:complexType> +. + <xs:complexType name="PREPARE"> + <xs:sequence> + <xs:element name="delete" type="hive2:DELETE" minOccurs="0" maxOccurs="unbounded"/> + <xs:element name="mkdir" type="hive2:MKDIR" minOccurs="0" maxOccurs="unbounded"/> + </xs:sequence> + </xs:complexType> +. + <xs:complexType name="DELETE"> + <xs:attribute name="path" type="xs:string" use="required"/> + </xs:complexType> +. + <xs:complexType name="MKDIR"> + <xs:attribute name="path" type="xs:string" use="required"/> + </xs:complexType> +. +</xs:schema> +</verbatim> + +[[index][::Go back to Oozie Documentation Index::]] + +</noautolink> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/oozie/blob/e332299d/docs/src/site/twiki/DG_UnifiedCredentialsModule.twiki ---------------------------------------------------------------------- diff --git a/docs/src/site/twiki/DG_UnifiedCredentialsModule.twiki b/docs/src/site/twiki/DG_UnifiedCredentialsModule.twiki index 7c52970..8503fa2 100644 --- a/docs/src/site/twiki/DG_UnifiedCredentialsModule.twiki +++ b/docs/src/site/twiki/DG_UnifiedCredentialsModule.twiki @@ -154,6 +154,7 @@ Oozie currently comes with two Credentials implementations: 1. HCatalog and Hive Metastore: =org.apache.oozie.action.hadoop.HCatCredentials= 1. HBase: =org.apache.oozie.action.hadoop.HBaseCredentials= + 1. Hive Server 2: org.apache.oozie.action.hadoop.Hive2Credentials HCatCredentials requires these two properties: @@ -163,14 +164,20 @@ HCatCredentials requires these two properties: *Note:* The HCatalog Metastore and Hive Metastore are one and the same and so the "hcat" type credential can also be used with the Hive action to talk to a secure Hive Metastore. -To use either of these implementations, they must be set in the oozie.credentials.credentialclasses property as described previously +Hive2Credentials requires these two properties: + + 1. hive2.jdbc.url + 1. hive2.server.principal + +To use any of these implementations, they must be set in the oozie.credentials.credentialclasses property as described previously <verbatim> <property> <name>oozie.credentials.credentialclasses</name> <value> hcat=org.apache.oozie.action.hadoop.HCatCredentials, - hive=org.apache.oozie.action.hadoop.HbaseCredentials + hive=org.apache.oozie.action.hadoop.HbaseCredentials, + hive2=org.apache.oozie.action.hadoop.Hive2Credentials </value> </property> </verbatim> http://git-wip-us.apache.org/repos/asf/oozie/blob/e332299d/docs/src/site/twiki/index.twiki ---------------------------------------------------------------------- diff --git a/docs/src/site/twiki/index.twiki b/docs/src/site/twiki/index.twiki index f078bf5..765a6ba 100644 --- a/docs/src/site/twiki/index.twiki +++ b/docs/src/site/twiki/index.twiki @@ -56,6 +56,7 @@ Enough reading already? Follow the steps in [[DG_QuickStart][Oozie Quick Start]] * [[DG_EmailActionExtension][Email Action]] * [[DG_ShellActionExtension][Shell Action]] * [[DG_HiveActionExtension][Hive Action]] + * [[DG_Hive2ActionExtension][Hive 2 Action]] * [[DG_SqoopActionExtension][Sqoop Action]] * [[DG_SshActionExtension][Ssh Action]] * [[DG_DistCpActionExtension][DistCp Action]] http://git-wip-us.apache.org/repos/asf/oozie/blob/e332299d/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 0b19431..5c8f2c4 100644 --- a/pom.xml +++ b/pom.xml @@ -231,6 +231,11 @@ <version>${project.version}</version> </dependency> <dependency> + <groupId>org.apache.oozie</groupId> + <artifactId>oozie-sharelib-hive2</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> <groupId>org.apache.oozie</groupId> <artifactId>oozie-docs</artifactId> <version>${project.version}</version> @@ -488,6 +493,31 @@ </exclusion> </exclusions> </dependency> + <dependency> + <groupId>org.apache.hive</groupId> + <artifactId>hive-beeline</artifactId> + <version>${hive.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hive</groupId> + <artifactId>hive-jdbc</artifactId> + <version>${hive.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hive</groupId> + <artifactId>hive-metastore</artifactId> + <version>${hive.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hive</groupId> + <artifactId>hive-serde</artifactId> + <version>${hive.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hive</groupId> + <artifactId>hive-service</artifactId> + <version>${hive.version}</version> + </dependency> <dependency> <groupId>org.slf4j</groupId> http://git-wip-us.apache.org/repos/asf/oozie/blob/e332299d/release-log.txt ---------------------------------------------------------------------- diff --git a/release-log.txt b/release-log.txt index 8e7dd03..2991a6e 100644 --- a/release-log.txt +++ b/release-log.txt @@ -1,5 +1,6 @@ -- Oozie 4.2.0 release (trunk - unreleased) +OOZIE-1457 Create a Hive Server 2 action (rkanter) OOZIE-1950 Coordinator job info should support timestamp (nominal time) (shwethags) OOZIE-1813 Add service to report/kill rogue bundles and coordinator jobs (puru) OOZIE-1847 HA - Oozie servers should shutdown (or go in safe mode) in case of ZK failure (puru) http://git-wip-us.apache.org/repos/asf/oozie/blob/e332299d/sharelib/hive2/pom.xml ---------------------------------------------------------------------- diff --git a/sharelib/hive2/pom.xml b/sharelib/hive2/pom.xml new file mode 100644 index 0000000..e4c2465 --- /dev/null +++ b/sharelib/hive2/pom.xml @@ -0,0 +1,524 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>org.apache.oozie</groupId> + <artifactId>oozie-main</artifactId> + <version>4.2.0-SNAPSHOT</version> + <relativePath>../..</relativePath> + </parent> + <groupId>org.apache.oozie</groupId> + <artifactId>oozie-sharelib-hive2</artifactId> + <version>4.2.0-SNAPSHOT</version> + <description>Apache Oozie Share Lib Hive 2</description> + <name>Apache Oozie Share Lib Hive 2</name> + <packaging>jar</packaging> + + <properties> + <sharelib.action.postfix>hive2</sharelib.action.postfix> + <sharelib.transitive.filtering>true</sharelib.transitive.filtering> + </properties> + + <dependencies> + <dependency> + <groupId>org.apache.hive</groupId> + <artifactId>hive-beeline</artifactId> + <scope>compile</scope> + <exclusions> + <exclusion> + <artifactId>commons-cli</artifactId> + <groupId>commons-cli</groupId> + </exclusion> + <exclusion> + <artifactId>commons-codec</artifactId> + <groupId>commons-codec</groupId> + </exclusion> + <exclusion> + <artifactId>commons-io</artifactId> + <groupId>commons-io</groupId> + </exclusion> + <exclusion> + <artifactId>commons-lang</artifactId> + <groupId>commons-lang</groupId> + </exclusion> + <exclusion> + <artifactId>commons-logging-api</artifactId> + <groupId>commons-logging</groupId> + </exclusion> + <exclusion> + <artifactId>guava</artifactId> + <groupId>com.google.guava</groupId> + </exclusion> + <exclusion> + <artifactId>hive-builtins</artifactId> + <groupId>org.apache.hive</groupId> + </exclusion> + <exclusion> + <artifactId>slf4j-api</artifactId> + <groupId>org.slf4j</groupId> + </exclusion> + <exclusion> + <artifactId>jackson-core-asl</artifactId> + <groupId>org.codehaus.jackson</groupId> + </exclusion> + <exclusion> + <artifactId>jackson-mapper-asl</artifactId> + <groupId>org.codehaus.jackson</groupId> + </exclusion> + <exclusion> + <artifactId>zookeeper</artifactId> + <groupId>org.apache.zookeeper</groupId> + </exclusion> + <exclusion> + <artifactId>commons-compress</artifactId> + <groupId>org.apache.commons</groupId> + </exclusion> + <exclusion> + <artifactId>hive-common</artifactId> + <groupId>org.apache.hive</groupId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>org.apache.hive</groupId> + <artifactId>hive-jdbc</artifactId> + <scope>compile</scope> + <exclusions> + <exclusion> + <artifactId>sqlline</artifactId> + <groupId>sqlline</groupId> + </exclusion> + <exclusion> + <artifactId>groovy-all</artifactId> + <groupId>org.codehaus.groovy</groupId> + </exclusion> + <exclusion> + <artifactId>ant</artifactId> + <groupId>org.apache.ant</groupId> + </exclusion> + <exclusion> + <artifactId>avro</artifactId> + <groupId>org.apache.avro</groupId> + </exclusion> + <exclusion> + <artifactId>avro-ipc</artifactId> + <groupId>org.apache.avro</groupId> + </exclusion> + <exclusion> + <artifactId>avro-mapred</artifactId> + <groupId>org.apache.avro</groupId> + </exclusion> + <exclusion> + <artifactId>commons-compress</artifactId> + <groupId>org.apache.commons</groupId> + </exclusion> + <exclusion> + <artifactId>hive-ant</artifactId> + <groupId>org.apache.hive</groupId> + </exclusion> + <exclusion> + <artifactId>parquet-hadoop-bundle</artifactId> + <groupId>com.twitter</groupId> + </exclusion> + <exclusion> + <artifactId>stax-api</artifactId> + <groupId>stax</groupId> + </exclusion> + <exclusion> + <artifactId>zookeeper</artifactId> + <groupId>org.apache.zookeeper</groupId> + </exclusion> + <exclusion> + <artifactId>hive-common</artifactId> + <groupId>org.apache.hive</groupId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>org.apache.hive</groupId> + <artifactId>hive-metastore</artifactId> + <scope>compile</scope> + <exclusions> + <exclusion> + <artifactId>asm</artifactId> + <groupId>asm</groupId> + </exclusion> + <exclusion> + <artifactId>avro</artifactId> + <groupId>org.apache.avro</groupId> + </exclusion> + <exclusion> + <artifactId>avro-ipc</artifactId> + <groupId>org.apache.avro</groupId> + </exclusion> + <exclusion> + <artifactId>avro-mapred</artifactId> + <groupId>org.apache.avro</groupId> + </exclusion> + <exclusion> + <artifactId>commons-cli</artifactId> + <groupId>commons-cli</groupId> + </exclusion> + <exclusion> + <artifactId>commons-codec</artifactId> + <groupId>commons-codec</groupId> + </exclusion> + <exclusion> + <artifactId>commons-compress</artifactId> + <groupId>org.apache.commons</groupId> + </exclusion> + <exclusion> + <artifactId>commons-dbcp</artifactId> + <groupId>commons-dbcp</groupId> + </exclusion> + <exclusion> + <artifactId>commons-lang</artifactId> + <groupId>commons-lang</groupId> + </exclusion> + <exclusion> + <artifactId>commons-logging-api</artifactId> + <groupId>commons-logging</groupId> + </exclusion> + <exclusion> + <artifactId>commons-pool</artifactId> + <groupId>commons-pool</groupId> + </exclusion> + <exclusion> + <artifactId>derby</artifactId> + <groupId>org.apache.derby</groupId> + </exclusion> + <exclusion> + <artifactId>hive-common</artifactId> + <groupId>org.apache.hive</groupId> + </exclusion> + <exclusion> + <artifactId>hive-serde</artifactId> + <groupId>org.apache.hive</groupId> + </exclusion> + <exclusion> + <artifactId>log4j</artifactId> + <groupId>log4j</groupId> + </exclusion> + <exclusion> + <artifactId>slf4j-api</artifactId> + <groupId>org.slf4j</groupId> + </exclusion> + <exclusion> + <artifactId>slf4j-log4j12</artifactId> + <groupId>org.slf4j</groupId> + </exclusion> + <exclusion> + <artifactId>guava</artifactId> + <groupId>com.google.guava</groupId> + </exclusion> + <exclusion> + <artifactId>jetty</artifactId> + <groupId>org.mortbay.jetty</groupId> + </exclusion> + <exclusion> + <artifactId>zookeeper</artifactId> + <groupId>org.apache.zookeeper</groupId> + </exclusion> + <exclusion> + <artifactId>bonecp</artifactId> + <groupId>com.jolbox</groupId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>org.apache.hive</groupId> + <artifactId>hive-service</artifactId> + <scope>compile</scope> + <exclusions> + <exclusion> + <artifactId>JavaEWAH</artifactId> + <groupId>com.googlecode.javaewah</groupId> + </exclusion> + <exclusion> + <artifactId>avro</artifactId> + <groupId>org.apache.avro</groupId> + </exclusion> + <exclusion> + <artifactId>avro-mapred</artifactId> + <groupId>org.apache.avro</groupId> + </exclusion> + <exclusion> + <artifactId>commons-beanutils</artifactId> + <groupId>commons-beanutils</groupId> + </exclusion> + <exclusion> + <artifactId>commons-beanutils-core</artifactId> + <groupId>commons-beanutils</groupId> + </exclusion> + <exclusion> + <artifactId>commons-codec</artifactId> + <groupId>commons-codec</groupId> + </exclusion> + <exclusion> + <artifactId>commons-collections</artifactId> + <groupId>commons-collections</groupId> + </exclusion> + <exclusion> + <artifactId>commons-configuration</artifactId> + <groupId>commons-configuration</groupId> + </exclusion> + <exclusion> + <artifactId>commons-lang</artifactId> + <groupId>commons-lang</groupId> + </exclusion> + <exclusion> + <artifactId>commons-logging-api</artifactId> + <groupId>commons-logging</groupId> + </exclusion> + <exclusion> + <artifactId>guava</artifactId> + <groupId>com.google.guava</groupId> + </exclusion> + <exclusion> + <artifactId>haivvreo</artifactId> + <groupId>com.linkedin</groupId> + </exclusion> + <exclusion> + <artifactId>hive-contrib</artifactId> + <groupId>org.apache.hive</groupId> + </exclusion> + <exclusion> + <artifactId>hive-exec</artifactId> + <groupId>org.apache.hive</groupId> + </exclusion> + <exclusion> + <artifactId>jetty</artifactId> + <groupId>org.mortbay.jetty</groupId> + </exclusion> + <exclusion> + <artifactId>jetty-util</artifactId> + <groupId>org.mortbay.jetty</groupId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>org.apache.oozie</groupId> + <artifactId>oozie-core</artifactId> + <scope>provided</scope> + <exclusions> + <exclusion> + <artifactId>guava</artifactId> + <groupId>com.google.guava</groupId> + </exclusion> + <exclusion> + <artifactId>jackson-core-asl</artifactId> + <groupId>org.codehaus.jackson</groupId> + </exclusion> + <exclusion> + <artifactId>jackson-mapper-asl</artifactId> + <groupId>org.codehaus.jackson</groupId> + </exclusion> + <exclusion> + <artifactId>jetty</artifactId> + <groupId>org.mortbay.jetty</groupId> + </exclusion> + <exclusion> + <artifactId>jetty-util</artifactId> + <groupId>org.mortbay.jetty</groupId> + </exclusion> + <exclusion> + <artifactId>servlet-api</artifactId> + <groupId>javax.servlet</groupId> + </exclusion> + <exclusion> + <artifactId>servlet-api-2.5</artifactId> + <groupId>org.mortbay.jetty</groupId> + </exclusion> + <exclusion> + <artifactId>zookeeper</artifactId> + <groupId>org.apache.zookeeper</groupId> + </exclusion> + <exclusion> + <artifactId>hive-common</artifactId> + <groupId>org.apache.hive</groupId> + </exclusion> + <exclusion> + <artifactId>derby</artifactId> + <groupId>org.apache.derby</groupId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>org.apache.oozie</groupId> + <artifactId>oozie-hadoop</artifactId> + <scope>provided</scope> + <exclusions> + <exclusion> + <artifactId>guava</artifactId> + <groupId>com.google.guava</groupId> + </exclusion> + <exclusion> + <artifactId>avro</artifactId> + <groupId>org.apache.avro</groupId> + </exclusion> + <exclusion> + <artifactId>commons-compress</artifactId> + <groupId>org.apache.commons</groupId> + </exclusion> + <exclusion> + <artifactId>jackson-core-asl</artifactId> + <groupId>org.codehaus.jackson</groupId> + </exclusion> + <exclusion> + <artifactId>jackson-mapper-asl</artifactId> + <groupId>org.codehaus.jackson</groupId> + </exclusion> + <exclusion> + <artifactId>jetty-util</artifactId> + <groupId>org.mortbay.jetty</groupId> + </exclusion> + <exclusion> + <artifactId>jsr305</artifactId> + <groupId>com.google.code.findbugs</groupId> + </exclusion> + <exclusion> + <artifactId>servlet-api</artifactId> + <groupId>javax.servlet</groupId> + </exclusion> + <exclusion> + <artifactId>zookeeper</artifactId> + <groupId>org.apache.zookeeper</groupId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>org.apache.oozie</groupId> + <artifactId>oozie-core</artifactId> + <classifier>tests</classifier> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.apache.oozie</groupId> + <artifactId>oozie-hadoop-test</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.apache.oozie</groupId> + <artifactId>oozie-hadoop-utils</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.oozie</groupId> + <artifactId>oozie-hcatalog</artifactId> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <scope>test</scope> + </dependency> + </dependencies> + + <build> + <resources> + <resource> + <directory>src/main/resources</directory> + <filtering>true</filtering> + </resource> + </resources> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-dependency-plugin</artifactId> + <executions> + <execution> + <id>gen-classpath</id> + <phase>generate-test-resources</phase> + <goals> + <goal>build-classpath</goal> + </goals> + <configuration> + <includeScope>compile</includeScope> + <outputFile>${project.build.directory}/classpath</outputFile> + </configuration> + </execution> + <execution> + <id>create-mrapp-generated-classpath</id> + <phase>generate-test-resources</phase> + <goals> + <goal>build-classpath</goal> + </goals> + <configuration> + <!-- needed to run the unit test for DS to generate the required classpath + that is required in the env of the launch container in the mini mr/yarn cluster --> + <outputFile>${project.build.directory}/test-classes/mrapp-generated-classpath</outputFile> + </configuration> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-antrun-plugin</artifactId> + <executions> + <execution> + <configuration> + <target> + <!-- needed to include Main class in classpath for mini yarn cluster for unit tests --> + <echo file="${project.build.directory}/test-classes/mrapp-generated-classpath" + append="true" + message=":${project.build.directory}/classes"/> + </target> + </configuration> + <goals> + <goal>run</goal> + </goals> + <phase>generate-test-resources</phase> + </execution> + </executions> + </plugin> + <!--<plugin>--> + <!--<groupId>org.apache.maven.plugins</groupId>--> + <!--<artifactId>maven-deploy-plugin</artifactId>--> + <!--<configuration>--> + <!--<skip>true</skip>--> + <!--</configuration>--> + <!--</plugin>--> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-assembly-plugin</artifactId> + <configuration> + <finalName>partial-sharelib</finalName> + <appendAssemblyId>false</appendAssemblyId> + <descriptors> + <descriptor>../../src/main/assemblies/partial-sharelib.xml</descriptor> + </descriptors> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.rat</groupId> + <artifactId>apache-rat-plugin</artifactId> + <configuration> + <excludeSubProjects>false</excludeSubProjects> + <excludes> + <!-- excluding all as the root POM does the full check --> + <exclude>**</exclude> + </excludes> + </configuration> + </plugin> + </plugins> + </build> + +</project> http://git-wip-us.apache.org/repos/asf/oozie/blob/e332299d/sharelib/hive2/src/main/java/org/apache/oozie/action/hadoop/Hive2Main.java ---------------------------------------------------------------------- diff --git a/sharelib/hive2/src/main/java/org/apache/oozie/action/hadoop/Hive2Main.java b/sharelib/hive2/src/main/java/org/apache/oozie/action/hadoop/Hive2Main.java new file mode 100644 index 0000000..b3fc5d6 --- /dev/null +++ b/sharelib/hive2/src/main/java/org/apache/oozie/action/hadoop/Hive2Main.java @@ -0,0 +1,237 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.oozie.action.hadoop; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hive.beeline.BeeLine; + +public class Hive2Main extends LauncherMain { + private static final Set<String> DISALLOWED_BEELINE_OPTIONS = new HashSet<String>(); + + static { + DISALLOWED_BEELINE_OPTIONS.add("-u"); + DISALLOWED_BEELINE_OPTIONS.add("-n"); + DISALLOWED_BEELINE_OPTIONS.add("-p"); + DISALLOWED_BEELINE_OPTIONS.add("-d"); + DISALLOWED_BEELINE_OPTIONS.add("-e"); + DISALLOWED_BEELINE_OPTIONS.add("-f"); + DISALLOWED_BEELINE_OPTIONS.add("-a"); + DISALLOWED_BEELINE_OPTIONS.add("--help"); + } + + public static void main(String[] args) throws Exception { + run(Hive2Main.class, args); + } + + private static Configuration initActionConf() { + // Loading action conf prepared by Oozie + Configuration actionConf = new Configuration(false); + + String actionXml = System.getProperty("oozie.action.conf.xml"); + + if (actionXml == null) { + throw new RuntimeException("Missing Java System Property [oozie.action.conf.xml]"); + } + if (!new File(actionXml).exists()) { + throw new RuntimeException("Action Configuration XML file [" + actionXml + "] does not exist"); + } else { + System.out.println("Using action configuration file " + actionXml); + } + + actionConf.addResource(new Path("file:///", actionXml)); + + // Propagate delegation related props from launcher job to Hive job + String delegationToken = getFilePathFromEnv("HADOOP_TOKEN_FILE_LOCATION"); + if (delegationToken != null) { + actionConf.set("mapreduce.job.credentials.binary", delegationToken); + System.out.println("------------------------"); + System.out.println("Setting env property for mapreduce.job.credentials.binary to: " + delegationToken); + System.out.println("------------------------"); + System.setProperty("mapreduce.job.credentials.binary", delegationToken); + } else { + System.out.println("Non-Kerberos execution"); + } + + // See https://issues.apache.org/jira/browse/HIVE-1411 + actionConf.set("datanucleus.plugin.pluginRegistryBundleCheck", "LOG"); + + return actionConf; + } + + @Override + protected void run(String[] args) throws Exception { + System.out.println(); + System.out.println("Oozie Hive 2 action configuration"); + System.out.println("================================================================="); + System.out.println(); + + Configuration actionConf = initActionConf(); + + List<String> arguments = new ArrayList<String>(); + String jdbcUrl = actionConf.get(Hive2ActionExecutor.HIVE2_JDBC_URL); + if (jdbcUrl == null) { + throw new RuntimeException("Action Configuration does not have [" + Hive2ActionExecutor.HIVE2_JDBC_URL + + "] property"); + } + arguments.add("-u"); + arguments.add(jdbcUrl); + + // Use the user who is running the map task + String username = actionConf.get("user.name"); + arguments.add("-n"); + arguments.add(username); + + String password = actionConf.get(Hive2ActionExecutor.HIVE2_PASSWORD); + if (password == null) { + // Have to pass something or Beeline might interactively prompt, which we don't want + password = "DUMMY"; + } + arguments.add("-p"); + arguments.add(password); + + // We always use the same driver + arguments.add("-d"); + arguments.add("org.apache.hive.jdbc.HiveDriver"); + + String scriptPath = actionConf.get(Hive2ActionExecutor.HIVE2_SCRIPT); + if (scriptPath == null) { + throw new RuntimeException("Action Configuration does not have [" + Hive2ActionExecutor.HIVE2_SCRIPT + + "] property"); + } + if (!new File(scriptPath).exists()) { + throw new RuntimeException("Hive 2 script file [" + scriptPath + "] does not exist"); + } + + // print out current directory & its contents + File localDir = new File("dummy").getAbsoluteFile().getParentFile(); + System.out.println("Current (local) dir = " + localDir.getAbsolutePath()); + System.out.println("------------------------"); + for (String file : localDir.list()) { + System.out.println(" " + file); + } + System.out.println("------------------------"); + System.out.println(); + + // Prepare the Hive Script + String script = readStringFromFile(scriptPath); + System.out.println(); + System.out.println("Script [" + scriptPath + "] content: "); + System.out.println("------------------------"); + System.out.println(script); + System.out.println("------------------------"); + System.out.println(); + + // Pass any parameters to Beeline via arguments + String[] params = MapReduceMain.getStrings(actionConf, Hive2ActionExecutor.HIVE2_PARAMS); + if (params.length > 0) { + System.out.println("Parameters:"); + System.out.println("------------------------"); + for (String param : params) { + System.out.println(" " + param); + + int idx = param.indexOf('='); + if (idx == -1) { + throw new RuntimeException("Parameter expression must contain an assignment: " + param); + } else if (idx == 0) { + throw new RuntimeException("Parameter value not specified: " + param); + } + arguments.add("--hivevar"); + arguments.add(param); + } + System.out.println("------------------------"); + System.out.println(); + } + + arguments.add("-f"); + arguments.add(scriptPath); + + // This tells BeeLine to look for a delegation token; otherwise it won't and will fail in secure mode because there are no + // Kerberos credentials. In non-secure mode, this argument is ignored so we can simply always pass it. + arguments.add("-a"); + arguments.add("delegationToken"); + + String[] beelineArgs = MapReduceMain.getStrings(actionConf, Hive2ActionExecutor.HIVE2_ARGS); + for (String beelineArg : beelineArgs) { + if (DISALLOWED_BEELINE_OPTIONS.contains(beelineArg)) { + throw new RuntimeException("Error: Beeline argument " + beelineArg + " is not supported"); + } + arguments.add(beelineArg); + } + + System.out.println("Beeline command arguments :"); + for (String arg : arguments) { + System.out.println(" " + arg); + } + System.out.println(); + + LauncherMainHadoopUtils.killChildYarnJobs(actionConf); + + System.out.println("================================================================="); + System.out.println(); + System.out.println(">>> Invoking Beeline command line now >>>"); + System.out.println(); + System.out.flush(); + + try { + runBeeline(arguments.toArray(new String[arguments.size()])); + } + catch (SecurityException ex) { + if (LauncherSecurityManager.getExitInvoked()) { + if (LauncherSecurityManager.getExitCode() != 0) { + throw ex; + } + } + } + finally { + System.out.println("\n<<< Invocation of Beeline command completed <<<\n"); + } + } + + private void runBeeline(String[] args) throws Exception { + BeeLine.main(args); + } + + private static String readStringFromFile(String filePath) throws IOException { + String line; + BufferedReader br = null; + try { + br = new BufferedReader(new FileReader(filePath)); + StringBuilder sb = new StringBuilder(); + String sep = System.getProperty("line.separator"); + while ((line = br.readLine()) != null) { + sb.append(line).append(sep); + } + return sb.toString(); + } + finally { + if (br != null) { + br.close(); + } + } + } +} \ No newline at end of file
