Author: knoguchi Date: Thu Oct 10 20:44:04 2024 New Revision: 1921242 URL: http://svn.apache.org/viewvc?rev=1921242&view=rev Log: PIG-5454: Make ParallelGC the default Garbage Collection (knoguchi)
Added: pig/trunk/test/e2e/pig/udfs/java/org/apache/pig/test/udf/evalfunc/DumpJVMArgsUDF.java Modified: pig/trunk/CHANGES.txt pig/trunk/src/org/apache/pig/PigConfiguration.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/Launcher.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceLauncher.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/TezLauncher.java pig/trunk/test/e2e/pig/tests/cmdline.conf Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1921242&r1=1921241&r2=1921242&view=diff ============================================================================== --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Thu Oct 10 20:44:04 2024 @@ -23,6 +23,8 @@ Trunk (unreleased changes) INCOMPATIBLE CHANGES IMPROVEMENTS +PIG-5454: Make ParallelGC the default Garbage Collection (knoguchi) + PIG-5438: Update SparkCounter.Accumulator to AccumulatorV2 (knoguchi) PIG-5439: Support Spark 3 and drop SparkShim (knoguchi) Modified: pig/trunk/src/org/apache/pig/PigConfiguration.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/PigConfiguration.java?rev=1921242&r1=1921241&r2=1921242&view=diff ============================================================================== --- pig/trunk/src/org/apache/pig/PigConfiguration.java (original) +++ pig/trunk/src/org/apache/pig/PigConfiguration.java Thu Oct 10 20:44:04 2024 @@ -527,4 +527,13 @@ public class PigConfiguration { public static final String PIG_PRINT_EXEC_PLAN = "pig.print.exec.plan"; + /** + * If GC is not explicitly set, default to parallelGC + */ + public static final String PIG_GC_PARAMS = "pig.gc.default.params"; + + public static final String PIG_GC_PARAMS_DEFAULT = "-XX:+UseParallelGC"; + + public static final String PIG_GC_PATTERN = "-XX:\\+Use[a-zA-Z0-9_]+GC"; + } Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/Launcher.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/Launcher.java?rev=1921242&r1=1921241&r2=1921242&view=diff ============================================================================== --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/Launcher.java (original) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/Launcher.java Thu Oct 10 20:44:04 2024 @@ -25,6 +25,7 @@ import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Properties; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -37,6 +38,7 @@ import org.apache.hadoop.mapreduce.TaskR import org.apache.hadoop.mapred.jobcontrol.Job; import org.apache.hadoop.mapred.jobcontrol.JobControl; import org.apache.pig.FuncSpec; +import org.apache.pig.PigConfiguration; import org.apache.pig.PigException; import org.apache.pig.backend.BackendException; import org.apache.pig.backend.executionengine.ExecException; @@ -661,4 +663,20 @@ public abstract class Launcher { public void destroy() { } + + protected void addGCParams(Properties properties, String key, boolean skipIfEmpty) { + String value = properties.getProperty(key); + if( value == null ) { + if( skipIfEmpty ) { + return; + } + value = properties.getProperty(PigConfiguration.PIG_GC_PARAMS, PigConfiguration.PIG_GC_PARAMS_DEFAULT); + properties.setProperty(key, value); + } else if (!value.matches(PigConfiguration.PIG_GC_PATTERN)) { + value += " " + + properties.getProperty(PigConfiguration.PIG_GC_PARAMS, PigConfiguration.PIG_GC_PARAMS_DEFAULT); + properties.setProperty(key, value); + } + } + } Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceLauncher.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceLauncher.java?rev=1921242&r1=1921241&r2=1921242&view=diff ============================================================================== --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceLauncher.java (original) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceLauncher.java Thu Oct 10 20:44:04 2024 @@ -172,6 +172,10 @@ public class MapReduceLauncher extends L aggregateWarning = Boolean.valueOf(pc.getProperties().getProperty("aggregate.warning")); MROperPlan mrp = compile(php, pc); + addGCParams(pc.getProperties(), JobConf.MAPRED_TASK_JAVA_OPTS, false); + addGCParams(pc.getProperties(), JobConf.MAPRED_MAP_TASK_JAVA_OPTS, true); + addGCParams(pc.getProperties(), JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS, true); + ConfigurationValidator.validatePigProperties(pc.getProperties()); Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties()); Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java?rev=1921242&r1=1921241&r2=1921242&view=diff ============================================================================== --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java (original) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java Thu Oct 10 20:44:04 2024 @@ -177,6 +177,10 @@ public class SparkLauncher extends Launc if (LOG.isDebugEnabled()) { LOG.debug(sparkplan); } + + addGCParams(pigContext.getProperties(), + org.apache.spark.launcher.SparkLauncher.EXECUTOR_EXTRA_JAVA_OPTIONS, false); + SparkPigStats sparkStats = (SparkPigStats) pigContext .getExecutionEngine().instantiatePigStats(); sparkStats.initialize(pigContext, sparkplan, jobConf); Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/TezLauncher.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/TezLauncher.java?rev=1921242&r1=1921241&r2=1921242&view=diff ============================================================================== --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/TezLauncher.java (original) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/TezLauncher.java Thu Oct 10 20:44:04 2024 @@ -35,6 +35,7 @@ import java.util.concurrent.ThreadFactor import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.counters.Limits; import org.apache.hadoop.util.StringUtils; @@ -119,6 +120,12 @@ public class TezLauncher extends Launche pc.getProperties().setProperty(TezConfiguration.TEZ_IGNORE_LIB_URIS, "true"); pc.getProperties().setProperty(TezConfiguration.TEZ_AM_DAG_SCHEDULER_CLASS, DAGSchedulerNaturalOrderControlled.class.getName()); } + + addGCParams(pc.getProperties(), JobConf.MAPRED_TASK_JAVA_OPTS, false); + addGCParams(pc.getProperties(), JobConf.MAPRED_MAP_TASK_JAVA_OPTS, true); + addGCParams(pc.getProperties(), JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS, true); + addGCParams(pc.getProperties(), TezConfiguration.TEZ_TASK_LAUNCH_CMD_OPTS, true); + Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties(), true); // Make sure MR counter does not exceed limit if (conf.get(TezConfiguration.TEZ_COUNTERS_MAX) != null) { Modified: pig/trunk/test/e2e/pig/tests/cmdline.conf URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tests/cmdline.conf?rev=1921242&r1=1921241&r2=1921242&view=diff ============================================================================== --- pig/trunk/test/e2e/pig/tests/cmdline.conf (original) +++ pig/trunk/test/e2e/pig/tests/cmdline.conf Thu Oct 10 20:44:04 2024 @@ -364,7 +364,37 @@ describe C::D; }, ] - } - ] + }, + { + 'name' => 'GC_params', + 'tests' => [ + { + 'num' => 1, + 'pig' => q\ +register :FUNCPATH:/testudf.jar; +A = load ':INPATH:/singlefile/fileexists' as (name:chararray); +B = FOREACH A GENERATE org.apache.pig.test.udf.evalfunc.DumpJVMArgsUDF(name); +dump B; +\, +#if no gc param is passed, parallelGC is specified (PIG-5454) by pig + ,'expected_out_regex' => "-XX:\\+UseParallelGC" + }, + { + 'num' => 2, + 'pig' => q\ +set spark.executor.extraJavaOptions '-XX:+UseG1GC'; +set mapreduce.map.java.opts '-XX:+UseG1GC'; +register :FUNCPATH:/testudf.jar; +A = load ':INPATH:/singlefile/fileexists' as (name:chararray); +B = FOREACH A GENERATE org.apache.pig.test.udf.evalfunc.DumpJVMArgsUDF(name); +dump B; +\, +#Making sure user's G1GC is still set. +#If parallelGC is also set, jvm will fail to start + ,'expected_out_regex' => "-XX:\\+UseG1GC" + }, + ], + } + ] } ; Added: pig/trunk/test/e2e/pig/udfs/java/org/apache/pig/test/udf/evalfunc/DumpJVMArgsUDF.java URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/udfs/java/org/apache/pig/test/udf/evalfunc/DumpJVMArgsUDF.java?rev=1921242&view=auto ============================================================================== --- pig/trunk/test/e2e/pig/udfs/java/org/apache/pig/test/udf/evalfunc/DumpJVMArgsUDF.java (added) +++ pig/trunk/test/e2e/pig/udfs/java/org/apache/pig/test/udf/evalfunc/DumpJVMArgsUDF.java Thu Oct 10 20:44:04 2024 @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pig.test.udf.evalfunc; + +import java.io.IOException; +import java.util.List; + +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; + +import org.apache.pig.EvalFunc; +import org.apache.pig.data.Tuple; + +public class DumpJVMArgsUDF extends EvalFunc<String> { + + + @Override + public String exec(Tuple input) throws IOException { + // After java9, we can instead use ProcessHandle to achive this + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + List<String> jvmArgs = bean.getInputArguments(); + StringBuffer sb = new StringBuffer(); + for( String arg : jvmArgs ) { + System.err.println(arg); + sb.append(arg); + sb.append(" "); + } + return sb.toString(); + } +}