Repository: oozie Updated Branches: refs/heads/master 9839fb292 -> c1cc9f41e
OOZIE-2343 Shell Action should take Oozie Action config and setup HADOOP_CONF_DIR (rkanter) Project: http://git-wip-us.apache.org/repos/asf/oozie/repo Commit: http://git-wip-us.apache.org/repos/asf/oozie/commit/c1cc9f41 Tree: http://git-wip-us.apache.org/repos/asf/oozie/tree/c1cc9f41 Diff: http://git-wip-us.apache.org/repos/asf/oozie/diff/c1cc9f41 Branch: refs/heads/master Commit: c1cc9f41e32f29663648d3d2e09d08cf3167ed4f Parents: 9839fb2 Author: Robert Kanter <[email protected]> Authored: Fri Aug 28 13:14:31 2015 -0700 Committer: Robert Kanter <[email protected]> Committed: Fri Aug 28 13:14:31 2015 -0700 ---------------------------------------------------------------------- .../action/hadoop/ShellActionExecutor.java | 9 ++++- core/src/main/resources/oozie-default.xml | 14 +++++++ .../oozie/action/hadoop/TestLauncher.java | 22 +++++++++++ .../action/hadoop/TestShellActionExecutor.java | 39 ++++++++++++++++++- release-log.txt | 1 + .../oozie/action/hadoop/LauncherMain.java | 37 ++++++++++++++++++ .../apache/oozie/action/hadoop/ShellMain.java | 41 +++++++++++++++++++- 7 files changed, 158 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/oozie/blob/c1cc9f41/core/src/main/java/org/apache/oozie/action/hadoop/ShellActionExecutor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/oozie/action/hadoop/ShellActionExecutor.java b/core/src/main/java/org/apache/oozie/action/hadoop/ShellActionExecutor.java index b1947e4..4fdd3ff 100644 --- a/core/src/main/java/org/apache/oozie/action/hadoop/ShellActionExecutor.java +++ b/core/src/main/java/org/apache/oozie/action/hadoop/ShellActionExecutor.java @@ -24,6 +24,7 @@ import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.oozie.action.ActionExecutorException; +import org.apache.oozie.service.ConfigurationService; import org.jdom.Element; import org.jdom.Namespace; @@ -67,8 +68,12 @@ public class ShellActionExecutor extends JavaActionExecutor { setListInConf("env-var", actionXml, actionConf, ShellMain.CONF_OOZIE_SHELL_ENVS, true); // Setting capture output flag - actionConf.setBoolean(ShellMain.CONF_OOZIE_SHELL_CAPTURE_OUTPUT, - actionXml.getChild("capture-output", ns) != null); + actionConf.setBoolean(ShellMain.CONF_OOZIE_SHELL_CAPTURE_OUTPUT, actionXml.getChild("capture-output", ns) != null); + + // Setting if ShellMain should setup HADOOP_CONF_DIR + boolean setupHadoopConfDir = actionConf.getBoolean(ShellMain.CONF_OOZIE_SHELL_SETUP_HADOOP_CONF_DIR, + ConfigurationService.getBoolean(ShellMain.CONF_OOZIE_SHELL_SETUP_HADOOP_CONF_DIR)); + actionConf.setBoolean(ShellMain.CONF_OOZIE_SHELL_SETUP_HADOOP_CONF_DIR, setupHadoopConfDir); return actionConf; } http://git-wip-us.apache.org/repos/asf/oozie/blob/c1cc9f41/core/src/main/resources/oozie-default.xml ---------------------------------------------------------------------- diff --git a/core/src/main/resources/oozie-default.xml b/core/src/main/resources/oozie-default.xml index d4a0536..b6e41a2 100644 --- a/core/src/main/resources/oozie-default.xml +++ b/core/src/main/resources/oozie-default.xml @@ -1731,6 +1731,20 @@ </property> <property> + <name>oozie.action.shell.setup.hadoop.conf.dir</name> + <value>false</value> + <description> + The Shell action is commonly used to run programs that rely on HADOOP_CONF_DIR (e.g. hive, beeline, sqoop, etc). With + YARN, HADOO_CONF_DIR is set to the NodeManager's copies of Hadoop's *-site.xml files, which can be problematic because + (a) they are for meant for the NM, not necessarily clients, and (b) they won't have any of the configs that Oozie, or + the user through Oozie, sets. When this property is set to true, The Shell action will prepare the *-site.xml files + based on the correct config and set HADOOP_CONF_DIR to point to it. Setting it to false will make Oozie leave + HADOOP_CONF_DIR alone. This can also be set at the Action level by putting it in the Shell Action's configuration + section, which also has priorty. That all said, it's recommended to use the appropriate action type when possible. + </description> + </property> + + <property> <name>oozie.action.launcher.yarn.timeline-service.enabled</name> <value>false</value> <description> http://git-wip-us.apache.org/repos/asf/oozie/blob/c1cc9f41/core/src/test/java/org/apache/oozie/action/hadoop/TestLauncher.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/oozie/action/hadoop/TestLauncher.java b/core/src/test/java/org/apache/oozie/action/hadoop/TestLauncher.java index 31b5e91..7a044ed 100644 --- a/core/src/test/java/org/apache/oozie/action/hadoop/TestLauncher.java +++ b/core/src/test/java/org/apache/oozie/action/hadoop/TestLauncher.java @@ -18,6 +18,7 @@ package org.apache.oozie.action.hadoop; +import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.filecache.DistributedCache; import org.apache.hadoop.fs.FileSystem; @@ -32,6 +33,8 @@ import org.apache.oozie.service.HadoopAccessorService; import org.apache.oozie.service.Services; import java.io.File; +import java.io.FileWriter; +import java.io.Writer; import java.net.URI; import java.util.Map; @@ -370,4 +373,23 @@ public class TestLauncher extends XFsTestCase { assertEquals("aa.jar#aa.jar", actionConf.get("mapreduce.job.cache.files")); } + public void testCopyFileMultiplex() throws Exception { + String contents = "Hello World!\nThis is Oozie"; + File src = new File(getTestCaseDir(), "src.txt"); + Writer w = new FileWriter(src); + w.write(contents); + w.close(); + + File[] dsts = new File[]{new File("dst1.txt"), new File("dist2.txt"), new File("dist3.txt")}; + for (File dst : dsts) { + dst.delete(); + assertFalse(dst.exists()); + } + LauncherMain.copyFileMultiplex(src, dsts); + for (File dst : dsts) { + assertTrue(dst.exists()); + assertEquals(contents, FileUtils.readFileToString(dst)); + } + } + } http://git-wip-us.apache.org/repos/asf/oozie/blob/c1cc9f41/core/src/test/java/org/apache/oozie/action/hadoop/TestShellActionExecutor.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/oozie/action/hadoop/TestShellActionExecutor.java b/core/src/test/java/org/apache/oozie/action/hadoop/TestShellActionExecutor.java index 2775baa..ca1dee8 100644 --- a/core/src/test/java/org/apache/oozie/action/hadoop/TestShellActionExecutor.java +++ b/core/src/test/java/org/apache/oozie/action/hadoop/TestShellActionExecutor.java @@ -18,6 +18,7 @@ package org.apache.oozie.action.hadoop; +import java.io.File; import java.io.OutputStreamWriter; import java.io.Writer; import java.util.Map; @@ -41,6 +42,7 @@ import org.apache.oozie.util.PropertiesUtils; import org.apache.oozie.util.XConfiguration; import org.apache.oozie.util.XmlUtils; import org.jdom.Element; +import org.junit.Assert; public class TestShellActionExecutor extends ActionExecutorTestCase { @@ -57,6 +59,9 @@ public class TestShellActionExecutor extends ActionExecutorTestCase { ? "dir /s /b\necho %1 %2\nexit 1" : "ls -ltr\necho $1 $2\nexit 1"; private static final String PERL_SCRIPT_CONTENT = "print \"MY_VAR=TESTING\";"; + private static final String SHELL_SCRIPT_HADOOP_CONF_DIR_CONTENT = Shell.WINDOWS + ? "echo OOZIE_ACTION_CONF_XML=%OOZIE_ACTION_CONF_XML%\necho HADOOP_CONF_DIR=%HADOOP_CONF_DIR%" + : "echo OOZIE_ACTION_CONF_XML=$OOZIE_ACTION_CONF_XML\necho HADOOP_CONF_DIR=$HADOOP_CONF_DIR"; /** * Verify if the ShellActionExecutor indeed setups the basic stuffs @@ -86,6 +91,7 @@ public class TestShellActionExecutor extends ActionExecutorTestCase { assertEquals("2", conf.get("oozie.shell.args.size")); assertEquals("a=A", conf.get("oozie.shell.args.0")); assertEquals("b=B", conf.get("oozie.shell.args.1")); + assertEquals("false", conf.get("oozie.action.shell.setup.hadoop.conf.dir")); } /** @@ -112,6 +118,36 @@ public class TestShellActionExecutor extends ActionExecutorTestCase { } /** + * test if a sample shell script could run successfully + * + * @throws Exception + */ + public void testShellScriptHadoopConfDir() throws Exception { + FileSystem fs = getFileSystem(); + // Create the script file with canned shell command + Path script = new Path(getAppPath(), SHELL_SCRIPTNAME); + Writer w = new OutputStreamWriter(fs.create(script)); + w.write(SHELL_SCRIPT_HADOOP_CONF_DIR_CONTENT); + w.close(); + + // Create sample Shell action xml + String actionXml = "<shell>" + "<job-tracker>" + getJobTrackerUri() + "</job-tracker>" + "<name-node>" + + getNameNodeUri() + "</name-node>" + "<configuration>" + + "<property><name>oozie.action.shell.setup.hadoop.conf.dir</name><value>true</value></property>" + + "</configuration>" + "<exec>" + SHELL_EXEC + "</exec>" + "<argument>" + SHELL_PARAM + "</argument>" + + "<argument>" + SHELL_SCRIPTNAME + "</argument>" + "<file>" + script.toString() + + "#" + script.getName() + "</file>" + "<capture-output/>" + "</shell>"; + // Submit and verify the job's status + WorkflowAction action = _testSubmit(actionXml, true, ""); + String oozieActionConfXml = PropertiesUtils.stringToProperties(action.getData()).getProperty("OOZIE_ACTION_CONF_XML"); + String hadoopConfDir = PropertiesUtils.stringToProperties(action.getData()).getProperty("HADOOP_CONF_DIR"); + assertNotNull(oozieActionConfXml); + assertNotNull(hadoopConfDir); + String s = new File(oozieActionConfXml).getParent() + File.separator + "oozie-hadoop-conf-"; + Assert.assertTrue("Expected HADOOP_CONF_DIR to start with " + s + " but was " + hadoopConfDir, hadoopConfDir.startsWith(s)); + } + + /** * test if a sample shell script could run with error when the script return * non-zero exit code * @@ -213,7 +249,7 @@ public class TestShellActionExecutor extends ActionExecutorTestCase { * @param checkForSuccess * @throws Exception */ - private void _testSubmit(String actionXml, boolean checkForSuccess, String capture_output) throws Exception { + private WorkflowAction _testSubmit(String actionXml, boolean checkForSuccess, String capture_output) throws Exception { Context context = createContext(actionXml); final RunningJob launcherJob = submitAction(context);// Submit the @@ -258,6 +294,7 @@ public class TestShellActionExecutor extends ActionExecutorTestCase { else {// Negative test cases assertEquals(WorkflowAction.Status.ERROR, context.getAction().getStatus()); } + return context.getAction(); } /** http://git-wip-us.apache.org/repos/asf/oozie/blob/c1cc9f41/release-log.txt ---------------------------------------------------------------------- diff --git a/release-log.txt b/release-log.txt index a01e7c6..06ee6a8 100644 --- a/release-log.txt +++ b/release-log.txt @@ -1,5 +1,6 @@ -- Oozie 4.3.0 release (trunk - unreleased) +OOZIE-2343 Shell Action should take Oozie Action config and setup HADOOP_CONF_DIR (rkanter) OOZIE-2245 Service to periodically check database schema (rkanter) OOZIE-2332 Add ability to provide Hive and Hive 2 Action queries inline in workflows (prateekrungta via rkanter) OOZIE-2329 Make handling yarn restarts configurable (puru) http://git-wip-us.apache.org/repos/asf/oozie/blob/c1cc9f41/sharelib/oozie/src/main/java/org/apache/oozie/action/hadoop/LauncherMain.java ---------------------------------------------------------------------- diff --git a/sharelib/oozie/src/main/java/org/apache/oozie/action/hadoop/LauncherMain.java b/sharelib/oozie/src/main/java/org/apache/oozie/action/hadoop/LauncherMain.java index 2288ed0..fb190d6 100644 --- a/sharelib/oozie/src/main/java/org/apache/oozie/action/hadoop/LauncherMain.java +++ b/sharelib/oozie/src/main/java/org/apache/oozie/action/hadoop/LauncherMain.java @@ -20,9 +20,11 @@ package org.apache.oozie.action.hadoop; import java.io.BufferedReader; import java.io.File; +import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.FileReader; import java.io.IOException; +import java.io.InputStream; import java.io.OutputStream; import java.io.StringWriter; import java.util.Collection; @@ -210,6 +212,41 @@ public abstract class LauncherMain { } } } + + /** + * Utility method that copies the contents of the src file into all of the dst file(s). + * It only requires reading the src file once. + * + * @param src The source file + * @param dst The destination file(s) + * @throws IOException + */ + protected static void copyFileMultiplex(File src, File... dst) throws IOException { + InputStream is = null; + OutputStream[] osa = new OutputStream[dst.length]; + try { + is = new FileInputStream(src); + for (int i = 0; i < osa.length; i++) { + osa[i] = new FileOutputStream(dst[i]); + } + byte[] buffer = new byte[4096]; + int read; + while ((read = is.read(buffer)) > -1) { + for (OutputStream os : osa) { + os.write(buffer, 0, read); + } + } + } finally { + if (is != null) { + is.close(); + } + for (OutputStream os : osa) { + if (os != null) { + os.close(); + } + } + } + } } class LauncherMainException extends Exception { http://git-wip-us.apache.org/repos/asf/oozie/blob/c1cc9f41/sharelib/oozie/src/main/java/org/apache/oozie/action/hadoop/ShellMain.java ---------------------------------------------------------------------- diff --git a/sharelib/oozie/src/main/java/org/apache/oozie/action/hadoop/ShellMain.java b/sharelib/oozie/src/main/java/org/apache/oozie/action/hadoop/ShellMain.java index 3f53915..553cf1b 100644 --- a/sharelib/oozie/src/main/java/org/apache/oozie/action/hadoop/ShellMain.java +++ b/sharelib/oozie/src/main/java/org/apache/oozie/action/hadoop/ShellMain.java @@ -26,12 +26,11 @@ import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; -import java.util.HashSet; +import java.util.Arrays; import java.util.List; import java.util.Map; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.util.Shell; public class ShellMain extends LauncherMain { @@ -39,7 +38,11 @@ public class ShellMain extends LauncherMain { public static final String CONF_OOZIE_SHELL_EXEC = "oozie.shell.exec"; public static final String CONF_OOZIE_SHELL_ENVS = "oozie.shell.envs"; public static final String CONF_OOZIE_SHELL_CAPTURE_OUTPUT = "oozie.shell.capture-output"; + public static final String CONF_OOZIE_SHELL_SETUP_HADOOP_CONF_DIR = "oozie.action.shell.setup.hadoop.conf.dir"; public static final String OOZIE_ACTION_CONF_XML = "OOZIE_ACTION_CONF_XML"; + private static final String HADOOP_CONF_DIR = "HADOOP_CONF_DIR"; + + private static String[] HADOOP_SITE_FILES = new String[] {"core-site.xml", "hdfs-site.xml", "mapred-site.xml", "yarn-site.xml"}; /** * @param args Invoked from LauncherMapper:map() @@ -81,6 +84,9 @@ public class ShellMain extends LauncherMain { System.out.println("Current working dir " + currDir); builder.directory(currDir); + // Setup Hadoop *-site files in case the user runs a Hadoop-type program (e.g. hive) + prepareHadoopConfigs(actionConf, envp, currDir); + printCommand(cmdArray, envp); // For debugging purpose System.out.println("================================================================="); @@ -110,6 +116,37 @@ public class ShellMain extends LauncherMain { } /** + * This method takes the OOZIE_ACTION_CONF_XML and copies it to Hadoop *-site files in a new directory; it then sets the + * HADOOP_CONF_DIR to point there. This should allow most Hadoop ecosystem CLI programs to have the proper configuration, + * propagated from Oozie's copy and including anything set in the Workflow's configuration section as well. Otherwise, + * HADOOP_CONF_DIR points to the NodeManager's *-site files, which are likely not suitable for client programs. + * It will only do this if {@link CONF_OOZIE_SHELL_SETUP_HADOOP_CONF_DIR} is set to true. + * + * @param actionConf The action configuration + * @param envp The environment for the Shell process + * @param currDir The current working dir + * @throws IOException + */ + private void prepareHadoopConfigs(Configuration actionConf, Map<String, String> envp, File currDir) throws IOException { + if (actionConf.getBoolean(CONF_OOZIE_SHELL_SETUP_HADOOP_CONF_DIR, false)) { + String actionXml = envp.get(OOZIE_ACTION_CONF_XML); + if (actionXml != null) { + File actionXmlFile = new File(actionXml); + File confDir = new File(currDir, "oozie-hadoop-conf-" + System.currentTimeMillis()); + System.out.println("Copying " + actionXml + " to " + confDir + "/" + Arrays.toString(HADOOP_SITE_FILES)); + confDir.mkdirs(); + File[] dstFiles = new File[HADOOP_SITE_FILES.length]; + for (int i = 0; i < dstFiles.length; i++) { + dstFiles[i] = new File(confDir, HADOOP_SITE_FILES[i]); + } + copyFileMultiplex(actionXmlFile, dstFiles); + System.out.println("Setting " + HADOOP_CONF_DIR + " to " + confDir.getAbsolutePath()); + envp.put(HADOOP_CONF_DIR, confDir.getAbsolutePath()); + } + } + } + + /** * Return the environment variable to pass to in shell command execution. * */
