Repository: spark
Updated Branches:
  refs/heads/branch-1.0 a74fbbbca -> 9ae80bf9b


http://git-wip-us.apache.org/repos/asf/spark/blob/9ae80bf9/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala 
b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
index 0342a8a..f75297a 100644
--- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.util
 
-import java.util.{Properties, UUID}
+import java.util.Properties
 
 import scala.collection.Map
 
@@ -52,6 +52,8 @@ class JsonProtocolSuite extends FunSuite {
     val blockManagerRemoved = SparkListenerBlockManagerRemoved(
       BlockManagerId("Scarce", "to be counted...", 100, 200))
     val unpersistRdd = SparkListenerUnpersistRDD(12345)
+    val applicationStart = SparkListenerApplicationStart("The winner of all", 
42L, "Garfield")
+    val applicationEnd = SparkListenerApplicationEnd(42L)
 
     testEvent(stageSubmitted, stageSubmittedJsonString)
     testEvent(stageCompleted, stageCompletedJsonString)
@@ -64,6 +66,8 @@ class JsonProtocolSuite extends FunSuite {
     testEvent(blockManagerAdded, blockManagerAddedJsonString)
     testEvent(blockManagerRemoved, blockManagerRemovedJsonString)
     testEvent(unpersistRdd, unpersistRDDJsonString)
+    testEvent(applicationStart, applicationStartJsonString)
+    testEvent(applicationEnd, applicationEndJsonString)
   }
 
   test("Dependent Classes") {
@@ -208,7 +212,13 @@ class JsonProtocolSuite extends FunSuite {
       case (e1: SparkListenerBlockManagerRemoved, e2: 
SparkListenerBlockManagerRemoved) =>
         assertEquals(e1.blockManagerId, e2.blockManagerId)
       case (e1: SparkListenerUnpersistRDD, e2: SparkListenerUnpersistRDD) =>
-        assert(e1.rddId === e2.rddId)
+        assert(e1.rddId == e2.rddId)
+      case (e1: SparkListenerApplicationStart, e2: 
SparkListenerApplicationStart) =>
+        assert(e1.appName == e2.appName)
+        assert(e1.time == e2.time)
+        assert(e1.sparkUser == e2.sparkUser)
+      case (e1: SparkListenerApplicationEnd, e2: SparkListenerApplicationEnd) 
=>
+        assert(e1.time == e2.time)
       case (SparkListenerShutdown, SparkListenerShutdown) =>
       case _ => fail("Events don't match in types!")
     }
@@ -553,4 +563,14 @@ class JsonProtocolSuite extends FunSuite {
       {"Event":"SparkListenerUnpersistRDD","RDD ID":12345}
     """
 
+  private val applicationStartJsonString =
+    """
+      {"Event":"SparkListenerApplicationStart","App Name":"The winner of 
all","Timestamp":42,
+      "User":"Garfield"}
+    """
+
+  private val applicationEndJsonString =
+    """
+      {"Event":"SparkListenerApplicationEnd","Timestamp":42}
+    """
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/9ae80bf9/docs/monitoring.md
----------------------------------------------------------------------
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 15bfb04..4c91c3a 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -12,17 +12,77 @@ displays useful information about the application. This 
includes:
 
 * A list of scheduler stages and tasks
 * A summary of RDD sizes and memory usage
-* Information about the running executors
 * Environmental information.
+* Information about the running executors
 
 You can access this interface by simply opening `http://<driver-node>:4040` in 
a web browser.
-If multiple SparkContexts are running on the same host, they will bind to 
succesive ports
+If multiple SparkContexts are running on the same host, they will bind to 
successive ports
 beginning with 4040 (4041, 4042, etc).
 
-Spark's Standalone Mode cluster manager also has its own
-[web UI](spark-standalone.html#monitoring-and-logging). 
+Note that this information is only available for the duration of the 
application by default.
+To view the web UI after the fact, set `spark.eventLog.enabled` to true before 
starting the
+application. This configures Spark to log Spark events that encode the 
information displayed
+in the UI to persisted storage.
 
-Note that in both of these UIs, the tables are sortable by clicking their 
headers,
+## Viewing After the Fact
+
+Spark's Standalone Mode cluster manager also has its own
+[web UI](spark-standalone.html#monitoring-and-logging). If an application has 
logged events over
+the course of its lifetime, then the Standalone master's web UI will 
automatically re-render the
+application's UI after the application has finished.
+
+If Spark is run on Mesos or YARN, it is still possible to reconstruct the UI 
of a finished
+application through Spark's history server, provided that the application's 
event logs exist.
+You can start a the history server by executing:
+
+    ./sbin/start-history-server.sh <base-logging-directory>
+
+The base logging directory must be supplied, and should contain 
sub-directories that each
+represents an application's event logs. This creates a web interface at
+`http://<server-url>:18080` by default. The history server depends on the 
following variables:
+
+<table class="table">
+  <tr><th style="width:21%">Environment Variable</th><th>Meaning</th></tr>
+  <tr>
+    <td><code>SPARK_DAEMON_MEMORY</code></td>
+    <td>Memory to allocate to the history server. (default: 512m).</td>
+  </tr>
+  <tr>
+    <td><code>SPARK_DAEMON_JAVA_OPTS</code></td>
+    <td>JVM options for the history server (default: none).</td>
+  </tr>
+</table>
+
+Further, the history server can be configured as follows:
+
+<table class="table">
+  <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
+  <tr>
+    <td>spark.history.updateInterval</td>
+    <td>10</td>
+    <td>
+      The period, in seconds, at which information displayed by this history 
server is updated.
+      Each update checks for any changes made to the event logs in persisted 
storage.
+    </td>
+  </tr>
+  <tr>
+    <td>spark.history.retainedApplications</td>
+    <td>250</td>
+    <td>
+      The number of application UIs to retain. If this cap is exceeded, then 
the oldest
+      applications will be removed.
+    </td>
+  </tr>
+  <tr>
+    <td>spark.history.ui.port</td>
+    <td>18080</td>
+    <td>
+      The port to which the web interface of the history server binds.
+    </td>
+  </tr>
+</table>
+
+Note that in all of these UIs, the tables are sortable by clicking their 
headers,
 making it easy to identify slow tasks, data skew, etc.
 
 # Metrics

http://git-wip-us.apache.org/repos/asf/spark/blob/9ae80bf9/repl/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala
----------------------------------------------------------------------
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala 
b/repl/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala
index 3ebf288..910b31d 100644
--- a/repl/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala
@@ -116,14 +116,14 @@ trait SparkILoopInit {
     }
   }
 
- def initializeSpark() {
+  def initializeSpark() {
     intp.beQuietDuring {
       command("""
          @transient val sc = 
org.apache.spark.repl.Main.interp.createSparkContext();
         """)
       command("import org.apache.spark.SparkContext._")
     }
-   echo("Spark context available as sc.")
+    echo("Spark context available as sc.")
   }
 
   // code to be executed only after the interpreter is initialized

http://git-wip-us.apache.org/repos/asf/spark/blob/9ae80bf9/sbin/start-history-server.sh
----------------------------------------------------------------------
diff --git a/sbin/start-history-server.sh b/sbin/start-history-server.sh
new file mode 100755
index 0000000..4a90c68
--- /dev/null
+++ b/sbin/start-history-server.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Starts the history server on the machine this script is executed on.
+#
+# Usage: start-history-server.sh <base-log-dir> [<web-ui-port>]
+#   Example: ./start-history-server.sh --dir /tmp/spark-events --port 18080
+#
+
+sbin=`dirname "$0"`
+sbin=`cd "$sbin"; pwd`
+
+if [ $# -lt 1 ]; then
+  echo "Usage: ./start-history-server.sh <base-log-dir>"
+  echo "Example: ./start-history-server.sh /tmp/spark-events"
+  exit
+fi
+
+LOG_DIR=$1
+
+"$sbin"/spark-daemon.sh start org.apache.spark.deploy.history.HistoryServer 1 
--dir "$LOG_DIR"

http://git-wip-us.apache.org/repos/asf/spark/blob/9ae80bf9/sbin/stop-history-server.sh
----------------------------------------------------------------------
diff --git a/sbin/stop-history-server.sh b/sbin/stop-history-server.sh
new file mode 100755
index 0000000..c0034ad
--- /dev/null
+++ b/sbin/stop-history-server.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Stops the history server on the machine this script is executed on.
+
+sbin=`dirname "$0"`
+sbin=`cd "$sbin"; pwd`
+
+"$sbin"/spark-daemon.sh stop org.apache.spark.deploy.history.HistoryServer 1

Reply via email to