Author: cutting Date: Thu Feb 23 15:36:08 2006 New Revision: 380272 URL: http://svn.apache.org/viewcvs?rev=380272&view=rev Log: Fix for HADOOP-41. Support passing more options to child JVM. Contributed by Michael Stack.
Modified: lucene/hadoop/trunk/conf/hadoop-default.xml lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskRunner.java Modified: lucene/hadoop/trunk/conf/hadoop-default.xml URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/conf/hadoop-default.xml?rev=380272&r1=380271&r2=380272&view=diff ============================================================================== --- lucene/hadoop/trunk/conf/hadoop-default.xml (original) +++ lucene/hadoop/trunk/conf/hadoop-default.xml Thu Feb 23 15:36:08 2006 @@ -190,10 +190,21 @@ </property> <property> - <name>mapred.child.heap.size</name> - <value>200m</value> - <description>The heap size (-Xmx) that will be used for task tracker - child processes.</description> + <name>mapred.child.java.opts</name> + <value>-Xmx200m</value> + <description>Java opts for the task tracker child processes. Subsumes + 'mapred.child.heap.size' (If a mapred.child.heap.size value is found + in a configuration, its maximum heap size will be used and a warning + emitted that heap.size has been deprecated). Also, the following symbols, + if present, will be interpolated: @taskid@ is replaced by current TaskID; + and @port@ will be replaced by mapred.task.tracker.report.port + 1 (A second + child will fail with a port-in-use if mapred.tasktracker.tasks.maximum is + greater than one). Any other occurrences of '@' will go unchanged. For + example, to enable verbose gc logging to a file named for the taskid in + /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of: + + -Xmx1024m -verbose:gc -Xloggc:/tmp/@[EMAIL PROTECTED] + </description> </property> <property> Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskRunner.java URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskRunner.java?rev=380272&r1=380271&r2=380272&view=diff ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskRunner.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskRunner.java Thu Feb 23 15:36:08 2006 @@ -21,6 +21,7 @@ import java.io.*; import java.util.logging.*; +import java.util.Vector; /** Base class that runs a task in a separate process. Tasks are run in a * separate process in order to isolate the map/reduce system code from bugs in @@ -90,20 +91,56 @@ classPath.append(workDir); } + // Build exec child jmv args. + Vector vargs = new Vector(8); File jvm = // use same jvm as parent new File(new File(System.getProperty("java.home"), "bin"), "java"); - - // run java - runChild(new String[] { - jvm.toString(), - //"-Xrunhprof:cpu=samples,file="+t.getTaskId()+".prof", - "-Xmx"+job.get("mapred.child.heap.size", "200m"), - "-cp", classPath.toString(), - TaskTracker.Child.class.getName(), // main is Child - tracker.taskReportPort+"", // pass umbilical port - t.getTaskId() // pass task identifier - }, workDir); + vargs.add(jvm.toString()); + + // Add child java ops. Also, mapred.child.heap.size has been superceded + // by // mapred.child.java.opts. Manage case where both are present + // letting the mapred.child.heap.size win over any setting of heap size in + // mapred.child.java.opts (Emit a warning that heap.size is deprecated). + // + // The following symbols if present in mapred.child.java.opts value are + // replaced: + // + @taskid@ is interpolated with value of TaskID. + // + Replaces @port@ with mapred.task.tracker.report.port + 1. + // Other occurrences of @ will not be altered. + // + // Example with multiple arguments and substitutions, showing + // jvm GC logging, and start of a passwordless JVM JMX agent so can + // connect with jconsole and the likes to watch child memory, threads + // and get thread dumps. + // + // <name>mapred.child.optional.jvm.args</name> + // <value>-verbose:gc -Xloggc:/tmp/@[EMAIL PROTECTED] \ + // -Dcom.sun.management.jmxremote.authenticate=false \ + // -Dcom.sun.management.jmxremote.ssl=false \ + // [EMAIL PROTECTED]@ + // </value> + // + String javaOpts = handleDeprecatedHeapSize( + job.get("mapred.child.java.opts", "-Xmx200m"), + job.get("mapred.child.heap.size")); + javaOpts = replaceAll(javaOpts, "@taskid@", t.getTaskId()); + int port = job.getInt("mapred.task.tracker.report.port", 50050) + 1; + javaOpts = replaceAll(javaOpts, "@port@", Integer.toString(port)); + String [] javaOptsSplit = javaOpts.split(" "); + for (int i = 0; i < javaOptsSplit.length; i++) { + vargs.add(javaOptsSplit[i]); + } + + // Add classpath. + vargs.add("-classpath"); + vargs.add(classPath.toString()); + // Add main class and its arguments + vargs.add(TaskTracker.Child.class.getName()); // main of Child + vargs.add(tracker.taskReportPort + ""); // pass umbilical port + vargs.add(t.getTaskId()); // pass task identifier + // Run java + runChild((String[])vargs.toArray(new String[0]), workDir); } catch (FSError e) { LOG.log(Level.SEVERE, "FSError", e); try { @@ -123,6 +160,65 @@ } finally { tracker.reportTaskFinished(t.getTaskId()); } + } + + /** + * Handle deprecated mapred.child.heap.size. + * If present, interpolate into mapred.child.java.opts value with + * warning. + * @param javaOpts Value of mapred.child.java.opts property. + * @param heapSize Value of mapred.child.heap.size property. + * @return A <code>javaOpts</code> with <code>heapSize</code> + * interpolated if present. + */ + private String handleDeprecatedHeapSize(String javaOpts, + final String heapSize) { + if (heapSize == null || heapSize.length() <= 0) { + return javaOpts; + } + final String MX = "-Xmx"; + int index = javaOpts.indexOf(MX); + if (index < 0) { + javaOpts = javaOpts + " " + MX + heapSize; + } else { + int end = javaOpts.indexOf(" ", index + MX.length()); + javaOpts = javaOpts.substring(0, index + MX.length()) + + heapSize + ((end < 0)? "": javaOpts.substring(end)); + } + LOG.warning("mapred.child.heap.size is deprecated. Use " + + "mapred.child.heap.size instead. Meantime, interpolated " + + "child.heap.size into child.java.opt: " + javaOpts); + return javaOpts; + } + + /** + * Replace <code>toFind</code> with <code>replacement</code>. + * When hadoop moves to JDK1.5, replace this method with + * String#replace (Of is commons-lang available, replace with + * StringUtils#replace). + * @param text String to do replacements in. + * @param toFind String to find. + * @param replacement String to replace <code>toFind</code> with. + * @return A String with all instances of <code>toFind</code> + * replaced by <code>replacement</code> (The original + * <code>text</code> is returned if <code>toFind</code> is not + * found in <code>text<code>). + */ + private static String replaceAll(String text, final String toFind, + final String replacement) { + if (text == null || toFind == null || replacement == null) { + throw new IllegalArgumentException("Text " + text + " or toFind " + + toFind + " or replacement " + replacement + " are null."); + } + int offset = 0; + for (int index = text.indexOf(toFind); index >= 0; + index = text.indexOf(toFind, offset)) { + offset = index + toFind.length(); + text = text.substring(0, index) + replacement + + text.substring(offset); + + } + return text; } /**