Repository: giraph Updated Branches: refs/heads/release-1.1 065d718d7 -> 4c139ee36 (forced update)
GIRAPH-842: option to dump histogram of memory usage when heap is low on memory (pavanka) Project: http://git-wip-us.apache.org/repos/asf/giraph/repo Commit: http://git-wip-us.apache.org/repos/asf/giraph/commit/372c35eb Tree: http://git-wip-us.apache.org/repos/asf/giraph/tree/372c35eb Diff: http://git-wip-us.apache.org/repos/asf/giraph/diff/372c35eb Branch: refs/heads/release-1.1 Commit: 372c35ebccedf64266a38a35e365d3810642fef2 Parents: 2e8c2c6 Author: Pavan Kumar <[email protected]> Authored: Tue Jun 10 12:12:43 2014 -0700 Committer: Pavan Kumar <[email protected]> Committed: Tue Jun 10 12:13:30 2014 -0700 ---------------------------------------------------------------------- CHANGELOG | 2 + .../apache/giraph/conf/GiraphConfiguration.java | 9 ++ .../org/apache/giraph/conf/GiraphConstants.java | 16 +++ .../apache/giraph/master/BspServiceMaster.java | 4 + .../giraph/utils/ReactiveJMapHistoDumper.java | 120 +++++++++++++++++++ .../apache/giraph/worker/BspServiceWorker.java | 4 + 6 files changed, 155 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/giraph/blob/372c35eb/CHANGELOG ---------------------------------------------------------------------- diff --git a/CHANGELOG b/CHANGELOG index bdc0c10..ec1f798 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,8 @@ Giraph Change Log Release 1.1.0 - unreleased + GIRAPH-842: option to dump histogram of memory usage when heap is low on memory (pavanka) + GIRAPH-904: Giraph can hang when hostnames include uppercase letters (netj via pavanka) GIRAPH-900: Remove timers for compute method (majakabiljo via pavanka) http://git-wip-us.apache.org/repos/asf/giraph/blob/372c35eb/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java index d764307..b6384dc 100644 --- a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java +++ b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java @@ -335,6 +335,15 @@ public class GiraphConfiguration extends Configuration } /** + * Check whether to enable heap memory supervisor thread + * + * @return true if jmap dumper is reactively enabled + */ + public boolean isReactiveJmapHistogramDumpEnabled() { + return REACTIVE_JMAP_ENABLE.get(this); + } + + /** * Set mapping from a key name to a list of classes. * * @param name String key name to use. http://git-wip-us.apache.org/repos/asf/giraph/blob/372c35eb/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java index dd0c9ae..7d7ceb2 100644 --- a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java +++ b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java @@ -443,6 +443,22 @@ public interface GiraphConstants { "Only print live objects in jmap?"); /** + * Option used by ReactiveJMapHistoDumper to check for an imminent + * OOM in worker or master process + */ + IntConfOption MIN_FREE_MBS_ON_HEAP = + new IntConfOption("giraph.heap.minFreeMb", 128, "Option used by " + + "worker and master observers to check for imminent OOM exception"); + /** + * Option can be used to enable reactively dumping jmap histo when + * OOM is imminent + */ + BooleanConfOption REACTIVE_JMAP_ENABLE = + new BooleanConfOption("giraph.heap.enableReactiveJmapDumping", false, + "Option to enable dumping jmap histogram reactively based on " + + "free memory on heap"); + + /** * Minimum percent of the maximum number of workers that have responded * in order to continue progressing. (float) */ http://git-wip-us.apache.org/repos/asf/giraph/blob/372c35eb/giraph-core/src/main/java/org/apache/giraph/master/BspServiceMaster.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/master/BspServiceMaster.java b/giraph-core/src/main/java/org/apache/giraph/master/BspServiceMaster.java index e367b94..ad7e045 100644 --- a/giraph-core/src/main/java/org/apache/giraph/master/BspServiceMaster.java +++ b/giraph-core/src/main/java/org/apache/giraph/master/BspServiceMaster.java @@ -54,6 +54,7 @@ import org.apache.giraph.metrics.ResetSuperstepMetricsObserver; import org.apache.giraph.metrics.SuperstepMetricsRegistry; import org.apache.giraph.metrics.WorkerSuperstepMetrics; import org.apache.giraph.utils.JMapHistoDumper; +import org.apache.giraph.utils.ReactiveJMapHistoDumper; import org.apache.giraph.utils.ProgressableUtils; import org.apache.giraph.time.SystemTime; import org.apache.giraph.time.Time; @@ -215,6 +216,9 @@ public class BspServiceMaster<I extends WritableComparable, if (conf.isJMapHistogramDumpEnabled()) { conf.addMasterObserverClass(JMapHistoDumper.class); } + if (conf.isReactiveJmapHistogramDumpEnabled()) { + conf.addMasterObserverClass(ReactiveJMapHistoDumper.class); + } observers = conf.createMasterObservers(); GiraphMetrics.get().addSuperstepResetObserver(this); http://git-wip-us.apache.org/repos/asf/giraph/blob/372c35eb/giraph-core/src/main/java/org/apache/giraph/utils/ReactiveJMapHistoDumper.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/utils/ReactiveJMapHistoDumper.java b/giraph-core/src/main/java/org/apache/giraph/utils/ReactiveJMapHistoDumper.java new file mode 100644 index 0000000..68369d9 --- /dev/null +++ b/giraph-core/src/main/java/org/apache/giraph/utils/ReactiveJMapHistoDumper.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.giraph.utils; + +import org.apache.giraph.conf.DefaultImmutableClassesGiraphConfigurable; +import org.apache.giraph.conf.GiraphConstants; +import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration; +import org.apache.giraph.master.MasterObserver; +import org.apache.giraph.worker.WorkerObserver; +import org.apache.log4j.Logger; + +/** + * An observer for both worker and master that periodically checks if available + * memory on heap is below certain threshold, and if found to be the case + * dumps jmap -histo for the process + */ +public class ReactiveJMapHistoDumper extends + DefaultImmutableClassesGiraphConfigurable implements + MasterObserver, WorkerObserver { + /** Logger */ + private static final Logger LOG = Logger.getLogger( + ReactiveJMapHistoDumper.class); + /** Size of mb */ + private static final int MB = 1024 * 1024; + + /** How many msec to sleep between calls */ + private int sleepMillis; + /** How many lines of output to print */ + private int linesToPrint; + /** How much free memory is expected */ + private int minFreeMemory; + + /** The jmap printing thread */ + private Thread thread; + /** Halt jmap thread */ + private volatile boolean stop = false; + + @Override + public void preApplication() { + // This is called by both WorkerObserver and MasterObserver + startSupervisorThread(); + } + + @Override + public void postApplication() { + // This is called by both WorkerObserver and MasterObserver + joinSupervisorThread(); + } + + /** + * Join the supervisor thread + */ + private void joinSupervisorThread() { + stop = true; + try { + thread.join(sleepMillis + 5000); + } catch (InterruptedException e) { + LOG.error("Failed to join jmap thread"); + } + } + + /** + * Start the supervisor thread + */ + public void startSupervisorThread() { + stop = false; + final Runtime runtime = Runtime.getRuntime(); + thread = new Thread(new Runnable() { + @Override + public void run() { + try { + while (!stop) { + long potentialMemory = (runtime.maxMemory() - + runtime.totalMemory()) + runtime.freeMemory(); + if (potentialMemory / MB < minFreeMemory) { + JMap.heapHistogramDump(linesToPrint); + } + Thread.sleep(sleepMillis); + } + } catch (InterruptedException e) { + LOG.warn("JMap histogram sleep interrupted", e); + } + } + }); + thread.setName("ReactiveJMapHistoDumperSupervisorThread"); + thread.start(); + } + + @Override + public void preSuperstep(long superstep) { } + + @Override + public void postSuperstep(long superstep) { } + + @Override + public void applicationFailed(Exception e) { } + + @Override + public void setConf(ImmutableClassesGiraphConfiguration configuration) { + sleepMillis = GiraphConstants.JMAP_SLEEP_MILLIS.get(configuration); + linesToPrint = GiraphConstants.JMAP_PRINT_LINES.get(configuration); + minFreeMemory = GiraphConstants.MIN_FREE_MBS_ON_HEAP.get(configuration); + } +} http://git-wip-us.apache.org/repos/asf/giraph/blob/372c35eb/giraph-core/src/main/java/org/apache/giraph/worker/BspServiceWorker.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/worker/BspServiceWorker.java b/giraph-core/src/main/java/org/apache/giraph/worker/BspServiceWorker.java index 104932c..a89da24 100644 --- a/giraph-core/src/main/java/org/apache/giraph/worker/BspServiceWorker.java +++ b/giraph-core/src/main/java/org/apache/giraph/worker/BspServiceWorker.java @@ -63,6 +63,7 @@ import org.apache.giraph.partition.PartitionStore; import org.apache.giraph.partition.WorkerGraphPartitioner; import org.apache.giraph.utils.CallableFactory; import org.apache.giraph.utils.JMapHistoDumper; +import org.apache.giraph.utils.ReactiveJMapHistoDumper; import org.apache.giraph.utils.LoggerUtils; import org.apache.giraph.utils.MemoryUtils; import org.apache.giraph.utils.ProgressableUtils; @@ -214,6 +215,9 @@ public class BspServiceWorker<I extends WritableComparable, if (conf.isJMapHistogramDumpEnabled()) { conf.addWorkerObserverClass(JMapHistoDumper.class); } + if (conf.isReactiveJmapHistogramDumpEnabled()) { + conf.addWorkerObserverClass(ReactiveJMapHistoDumper.class); + } observers = conf.createWorkerObservers(); WorkerProgress.get().setTaskId(getTaskPartition());
