svn commit: r1607545 - in /spark: images/graphx-perf-comparison.png site/images/graphx-perf-comparison.png

2014-07-03 Thread ankurdave
Author: ankurdave
Date: Thu Jul  3 07:08:46 2014
New Revision: 1607545

URL: http://svn.apache.org/r1607545
Log:
Correct the GraphX performance comparison graphic

Modified:
spark/images/graphx-perf-comparison.png
spark/site/images/graphx-perf-comparison.png

Modified: spark/images/graphx-perf-comparison.png
URL: 
http://svn.apache.org/viewvc/spark/images/graphx-perf-comparison.png?rev=1607545r1=1607544r2=1607545view=diff
==
Binary files - no diff available.

Modified: spark/site/images/graphx-perf-comparison.png
URL: 
http://svn.apache.org/viewvc/spark/site/images/graphx-perf-comparison.png?rev=1607545r1=1607544r2=1607545view=diff
==
Binary files - no diff available.




git commit: [SPARK-2324] SparkContext should not exit directly when spark.local.dir is a list of multiple paths and one of them has error

2014-07-03 Thread adav
Repository: spark
Updated Branches:
  refs/heads/master bc7041a42 - 3bbeca648


[SPARK-2324] SparkContext should not exit directly when spark.local.dir is a 
list of multiple paths and one of them has error

The spark.local.dir is configured as a list of multiple paths as follows 
/data1/sparkenv/local,/data2/sparkenv/local. If the disk data2 of the driver 
node has error, the application will exit since DiskBlockManager exits directly 
at createLocalDirs. If the disk data2 of the worker node has error, the 
executor will exit either.
DiskBlockManager should not exit directly at createLocalDirs if one of 
spark.local.dir has error. Since spark.local.dir has multiple paths, a problem 
should not affect the overall situation.
I think DiskBlockManager could ignore the bad directory at createLocalDirs.

Author: yantangzhai tyz0...@163.com

Closes #1274 from YanTangZhai/SPARK-2324 and squashes the following commits:

609bf48 [yantangzhai] [SPARK-2324] SparkContext should not exit directly when 
spark.local.dir is a list of multiple paths and one of them has error
df08673 [yantangzhai] [SPARK-2324] SparkContext should not exit directly when 
spark.local.dir is a list of multiple paths and one of them has error


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3bbeca64
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3bbeca64
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3bbeca64

Branch: refs/heads/master
Commit: 3bbeca648985b32bdf1eedef779cb2817eb6dfa4
Parents: bc7041a
Author: yantangzhai tyz0...@163.com
Authored: Thu Jul 3 10:14:35 2014 -0700
Committer: Aaron Davidson aa...@databricks.com
Committed: Thu Jul 3 10:14:35 2014 -0700

--
 .../org/apache/spark/storage/DiskBlockManager.scala | 16 +++-
 1 file changed, 11 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3bbeca64/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala 
b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
index 2ec46d4..673fc19 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
@@ -44,6 +44,10 @@ private[spark] class DiskBlockManager(shuffleManager: 
ShuffleBlockManager, rootD
* directory, create multiple subdirectories that we will hash files into, 
in order to avoid
* having really large inodes at the top level. */
   private val localDirs: Array[File] = createLocalDirs()
+  if (localDirs.isEmpty) {
+logError(Failed to create any local dir.)
+System.exit(ExecutorExitCode.DISK_STORE_FAILED_TO_CREATE_DIR)
+  }
   private val subDirs = Array.fill(localDirs.length)(new 
Array[File](subDirsPerLocalDir))
   private var shuffleSender : ShuffleSender = null
 
@@ -116,7 +120,7 @@ private[spark] class DiskBlockManager(shuffleManager: 
ShuffleBlockManager, rootD
   private def createLocalDirs(): Array[File] = {
 logDebug(sCreating local directories at root dirs '$rootDirs')
 val dateFormat = new SimpleDateFormat(MMddHHmmss)
-rootDirs.split(,).map { rootDir =
+rootDirs.split(,).flatMap { rootDir =
   var foundLocalDir = false
   var localDir: File = null
   var localDirId: String = null
@@ -136,11 +140,13 @@ private[spark] class DiskBlockManager(shuffleManager: 
ShuffleBlockManager, rootD
 }
   }
   if (!foundLocalDir) {
-logError(sFailed $MAX_DIR_CREATION_ATTEMPTS attempts to create local 
dir in $rootDir)
-System.exit(ExecutorExitCode.DISK_STORE_FAILED_TO_CREATE_DIR)
+logError(sFailed $MAX_DIR_CREATION_ATTEMPTS attempts to create local 
dir in $rootDir. +
+   Ignoring this directory.)
+None
+  } else {
+logInfo(sCreated local directory at $localDir)
+Some(localDir)
   }
-  logInfo(sCreated local directory at $localDir)
-  localDir
 }
   }
 



git commit: [SPARK] Fix NPE for ExternalAppendOnlyMap

2014-07-03 Thread adav
Repository: spark
Updated Branches:
  refs/heads/master 3bbeca648 - c48053773


[SPARK] Fix NPE for ExternalAppendOnlyMap

It did not handle null keys very gracefully before.

Author: Andrew Or andrewo...@gmail.com

Closes #1288 from andrewor14/fix-external and squashes the following commits:

312b8d8 [Andrew Or] Abstract key hash code
ed5adf9 [Andrew Or] Fix NPE for ExternalAppendOnlyMap


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c4805377
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c4805377
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c4805377

Branch: refs/heads/master
Commit: c480537739f9329ebfd580f09c69778e6c976366
Parents: 3bbeca6
Author: Andrew Or andrewo...@gmail.com
Authored: Thu Jul 3 10:26:50 2014 -0700
Committer: Aaron Davidson aa...@databricks.com
Committed: Thu Jul 3 10:26:50 2014 -0700

--
 .../util/collection/ExternalAppendOnlyMap.scala | 30 ++--
 .../collection/ExternalAppendOnlyMapSuite.scala | 27 --
 2 files changed, 46 insertions(+), 11 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c4805377/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
 
b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
index 288badd..292d096 100644
--- 
a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
+++ 
b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
@@ -252,7 +252,7 @@ class ExternalAppendOnlyMap[K, V, C](
   if (it.hasNext) {
 var kc = it.next()
 kcPairs += kc
-val minHash = kc._1.hashCode()
+val minHash = getKeyHashCode(kc)
 while (it.hasNext  it.head._1.hashCode() == minHash) {
   kc = it.next()
   kcPairs += kc
@@ -294,8 +294,9 @@ class ExternalAppendOnlyMap[K, V, C](
   // Select a key from the StreamBuffer that holds the lowest key hash
   val minBuffer = mergeHeap.dequeue()
   val (minPairs, minHash) = (minBuffer.pairs, minBuffer.minKeyHash)
-  var (minKey, minCombiner) = minPairs.remove(0)
-  assert(minKey.hashCode() == minHash)
+  val minPair = minPairs.remove(0)
+  var (minKey, minCombiner) = minPair
+  assert(getKeyHashCode(minPair) == minHash)
 
   // For all other streams that may have this key (i.e. have the same 
minimum key hash),
   // merge in the corresponding value (if any) from that stream
@@ -327,15 +328,16 @@ class ExternalAppendOnlyMap[K, V, C](
  * StreamBuffers are ordered by the minimum key hash found across all of 
their own pairs.
  */
 private class StreamBuffer(
-val iterator: BufferedIterator[(K, C)], val pairs: ArrayBuffer[(K, C)])
+val iterator: BufferedIterator[(K, C)],
+val pairs: ArrayBuffer[(K, C)])
   extends Comparable[StreamBuffer] {
 
   def isEmpty = pairs.length == 0
 
   // Invalid if there are no more pairs in this stream
-  def minKeyHash = {
+  def minKeyHash: Int = {
 assert(pairs.length  0)
-pairs.head._1.hashCode()
+getKeyHashCode(pairs.head)
   }
 
   override def compareTo(other: StreamBuffer): Int = {
@@ -422,10 +424,22 @@ class ExternalAppendOnlyMap[K, V, C](
 }
 
 private[spark] object ExternalAppendOnlyMap {
+
+  /**
+   * Return the key hash code of the given (key, combiner) pair.
+   * If the key is null, return a special hash code.
+   */
+  private def getKeyHashCode[K, C](kc: (K, C)): Int = {
+if (kc._1 == null) 0 else kc._1.hashCode()
+  }
+
+  /**
+   * A comparator for (key, combiner) pairs based on their key hash codes.
+   */
   private class KCComparator[K, C] extends Comparator[(K, C)] {
 def compare(kc1: (K, C), kc2: (K, C)): Int = {
-  val hash1 = kc1._1.hashCode()
-  val hash2 = kc2._1.hashCode()
+  val hash1 = getKeyHashCode(kc1)
+  val hash2 = getKeyHashCode(kc2)
   if (hash1  hash2) -1 else if (hash1 == hash2) 0 else 1
 }
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/c4805377/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
--
diff --git 
a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
 
b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
index deb7809..4288229 100644
--- 
a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
+++ 
b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
@@ 

git commit: [SPARK] Fix NPE for ExternalAppendOnlyMap

2014-07-03 Thread adav
Repository: spark
Updated Branches:
  refs/heads/branch-1.0 87b74a9bf - fdee6ee06


[SPARK] Fix NPE for ExternalAppendOnlyMap

It did not handle null keys very gracefully before.

Author: Andrew Or andrewo...@gmail.com

Closes #1288 from andrewor14/fix-external and squashes the following commits:

312b8d8 [Andrew Or] Abstract key hash code
ed5adf9 [Andrew Or] Fix NPE for ExternalAppendOnlyMap

(cherry picked from commit c480537739f9329ebfd580f09c69778e6c976366)
Signed-off-by: Aaron Davidson aa...@databricks.com


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fdee6ee0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fdee6ee0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fdee6ee0

Branch: refs/heads/branch-1.0
Commit: fdee6ee0655f04e9a0d3a66f2e8df5486a5ea032
Parents: 87b74a9
Author: Andrew Or andrewo...@gmail.com
Authored: Thu Jul 3 10:26:50 2014 -0700
Committer: Aaron Davidson aa...@databricks.com
Committed: Thu Jul 3 10:28:06 2014 -0700

--
 .../util/collection/ExternalAppendOnlyMap.scala | 30 ++--
 .../collection/ExternalAppendOnlyMapSuite.scala | 27 --
 2 files changed, 46 insertions(+), 11 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/fdee6ee0/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
 
b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
index 288badd..292d096 100644
--- 
a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
+++ 
b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
@@ -252,7 +252,7 @@ class ExternalAppendOnlyMap[K, V, C](
   if (it.hasNext) {
 var kc = it.next()
 kcPairs += kc
-val minHash = kc._1.hashCode()
+val minHash = getKeyHashCode(kc)
 while (it.hasNext  it.head._1.hashCode() == minHash) {
   kc = it.next()
   kcPairs += kc
@@ -294,8 +294,9 @@ class ExternalAppendOnlyMap[K, V, C](
   // Select a key from the StreamBuffer that holds the lowest key hash
   val minBuffer = mergeHeap.dequeue()
   val (minPairs, minHash) = (minBuffer.pairs, minBuffer.minKeyHash)
-  var (minKey, minCombiner) = minPairs.remove(0)
-  assert(minKey.hashCode() == minHash)
+  val minPair = minPairs.remove(0)
+  var (minKey, minCombiner) = minPair
+  assert(getKeyHashCode(minPair) == minHash)
 
   // For all other streams that may have this key (i.e. have the same 
minimum key hash),
   // merge in the corresponding value (if any) from that stream
@@ -327,15 +328,16 @@ class ExternalAppendOnlyMap[K, V, C](
  * StreamBuffers are ordered by the minimum key hash found across all of 
their own pairs.
  */
 private class StreamBuffer(
-val iterator: BufferedIterator[(K, C)], val pairs: ArrayBuffer[(K, C)])
+val iterator: BufferedIterator[(K, C)],
+val pairs: ArrayBuffer[(K, C)])
   extends Comparable[StreamBuffer] {
 
   def isEmpty = pairs.length == 0
 
   // Invalid if there are no more pairs in this stream
-  def minKeyHash = {
+  def minKeyHash: Int = {
 assert(pairs.length  0)
-pairs.head._1.hashCode()
+getKeyHashCode(pairs.head)
   }
 
   override def compareTo(other: StreamBuffer): Int = {
@@ -422,10 +424,22 @@ class ExternalAppendOnlyMap[K, V, C](
 }
 
 private[spark] object ExternalAppendOnlyMap {
+
+  /**
+   * Return the key hash code of the given (key, combiner) pair.
+   * If the key is null, return a special hash code.
+   */
+  private def getKeyHashCode[K, C](kc: (K, C)): Int = {
+if (kc._1 == null) 0 else kc._1.hashCode()
+  }
+
+  /**
+   * A comparator for (key, combiner) pairs based on their key hash codes.
+   */
   private class KCComparator[K, C] extends Comparator[(K, C)] {
 def compare(kc1: (K, C), kc2: (K, C)): Int = {
-  val hash1 = kc1._1.hashCode()
-  val hash2 = kc2._1.hashCode()
+  val hash1 = getKeyHashCode(kc1)
+  val hash2 = getKeyHashCode(kc2)
   if (hash1  hash2) -1 else if (hash1 == hash2) 0 else 1
 }
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/fdee6ee0/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
--
diff --git 
a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
 
b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
index deb7809..4288229 100644
--- 

git commit: SPARK-1675. Make clear whether computePrincipalComponents requires centered data

2014-07-03 Thread meng
Repository: spark
Updated Branches:
  refs/heads/master c48053773 - 2b36344f5


SPARK-1675. Make clear whether computePrincipalComponents requires centered data

Just closing out this small JIRA, resolving with a comment change.

Author: Sean Owen so...@cloudera.com

Closes #1171 from srowen/SPARK-1675 and squashes the following commits:

45ee9b7 [Sean Owen] Add simple note that data need not be centered for 
computePrincipalComponents


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2b36344f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2b36344f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2b36344f

Branch: refs/heads/master
Commit: 2b36344f588d4e7357ce9921dc656e2389ba1dea
Parents: c480537
Author: Sean Owen so...@cloudera.com
Authored: Thu Jul 3 11:54:51 2014 -0700
Committer: Xiangrui Meng m...@databricks.com
Committed: Thu Jul 3 11:54:51 2014 -0700

--
 .../org/apache/spark/mllib/linalg/distributed/RowMatrix.scala  | 2 ++
 1 file changed, 2 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2b36344f/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
 
b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index 1a0073c..695e03b 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -347,6 +347,8 @@ class RowMatrix(
* The principal components are stored a local matrix of size n-by-k.
* Each column corresponds for one principal component,
* and the columns are in descending order of component variance.
+   * The row data do not need to be centered first; it is not necessary for
+   * the mean of each column to be 0.
*
* @param k number of top principal components.
* @return a matrix of size n-by-k, whose columns are principal components



git commit: [SPARK-2342] Evaluation helper's output type doesn't conform to input ty...

2014-07-03 Thread marmbrus
Repository: spark
Updated Branches:
  refs/heads/master 2b36344f5 - a9b52e562


[SPARK-2342] Evaluation helper's output type doesn't conform to input ty...

The function cast doesn't conform to the intention of Those expressions are 
supposed to be in the same data type, and also the return type. comment

Author: Yijie Shen henry.yijies...@gmail.com

Closes #1283 from yijieshen/master and squashes the following commits:

c7aaa4b [Yijie Shen] [SPARK-2342] Evaluation helper's output type doesn't 
conform to input type


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a9b52e56
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a9b52e56
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a9b52e56

Branch: refs/heads/master
Commit: a9b52e5623f7fc77fca96b095f9eeaef76e35d54
Parents: 2b36344
Author: Yijie Shen henry.yijies...@gmail.com
Authored: Thu Jul 3 13:22:13 2014 -0700
Committer: Michael Armbrust mich...@databricks.com
Committed: Thu Jul 3 13:22:13 2014 -0700

--
 .../org/apache/spark/sql/catalyst/expressions/Expression.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a9b52e56/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 0411ce3..ba62dab 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -111,7 +111,7 @@ abstract class Expression extends TreeNode[Expression] {
   } else {
 e1.dataType match {
   case n: NumericType =
-f.asInstanceOf[(Numeric[n.JvmType], n.JvmType, n.JvmType) = Int](
+f.asInstanceOf[(Numeric[n.JvmType], n.JvmType, n.JvmType) = 
n.JvmType](
   n.numeric, evalE1.asInstanceOf[n.JvmType], 
evalE2.asInstanceOf[n.JvmType])
   case other = sys.error(sType $other does not support numeric 
operations)
 }



git commit: [SPARK-2342] Evaluation helper's output type doesn't conform to input ty...

2014-07-03 Thread marmbrus
Repository: spark
Updated Branches:
  refs/heads/branch-1.0 fdee6ee06 - 7766c9d26


[SPARK-2342] Evaluation helper's output type doesn't conform to input ty...

The function cast doesn't conform to the intention of Those expressions are 
supposed to be in the same data type, and also the return type. comment

Author: Yijie Shen henry.yijies...@gmail.com

Closes #1283 from yijieshen/master and squashes the following commits:

c7aaa4b [Yijie Shen] [SPARK-2342] Evaluation helper's output type doesn't 
conform to input type

(cherry picked from commit a9b52e5623f7fc77fca96b095f9eeaef76e35d54)
Signed-off-by: Michael Armbrust mich...@databricks.com


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7766c9d2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7766c9d2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7766c9d2

Branch: refs/heads/branch-1.0
Commit: 7766c9d26c489498b4ad4ff20868e4555990d7eb
Parents: fdee6ee
Author: Yijie Shen henry.yijies...@gmail.com
Authored: Thu Jul 3 13:22:13 2014 -0700
Committer: Michael Armbrust mich...@databricks.com
Committed: Thu Jul 3 13:22:24 2014 -0700

--
 .../org/apache/spark/sql/catalyst/expressions/Expression.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7766c9d2/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 0411ce3..ba62dab 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -111,7 +111,7 @@ abstract class Expression extends TreeNode[Expression] {
   } else {
 e1.dataType match {
   case n: NumericType =
-f.asInstanceOf[(Numeric[n.JvmType], n.JvmType, n.JvmType) = Int](
+f.asInstanceOf[(Numeric[n.JvmType], n.JvmType, n.JvmType) = 
n.JvmType](
   n.numeric, evalE1.asInstanceOf[n.JvmType], 
evalE2.asInstanceOf[n.JvmType])
   case other = sys.error(sType $other does not support numeric 
operations)
 }



git commit: [SPARK-2109] Setting SPARK_MEM for bin/pyspark does not work.

2014-07-03 Thread pwendell
Repository: spark
Updated Branches:
  refs/heads/master a9b52e562 - 731f683b1


[SPARK-2109] Setting SPARK_MEM for bin/pyspark does not work.

Trivial fix.

Author: Prashant Sharma prashan...@imaginea.com

Closes #1050 from ScrapCodes/SPARK-2109/pyspark-script-bug and squashes the 
following commits:

77072b9 [Prashant Sharma] Changed echos to redirect to STDERR.
13f48a0 [Prashant Sharma] [SPARK-2109] Setting SPARK_MEM for bin/pyspark does 
not work.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/731f683b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/731f683b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/731f683b

Branch: refs/heads/master
Commit: 731f683b1bd8abbb83030b6bae14876658bbf098
Parents: a9b52e5
Author: Prashant Sharma prashan...@imaginea.com
Authored: Thu Jul 3 15:06:58 2014 -0700
Committer: Patrick Wendell pwend...@gmail.com
Committed: Thu Jul 3 15:06:58 2014 -0700

--
 bin/compute-classpath.sh |  8 
 bin/pyspark  |  6 +++---
 bin/run-example  | 10 +-
 bin/spark-class  | 13 ++---
 4 files changed, 18 insertions(+), 19 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/731f683b/bin/compute-classpath.sh
--
diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh
index 2cf4e38..e81e8c0 100755
--- a/bin/compute-classpath.sh
+++ b/bin/compute-classpath.sh
@@ -81,10 +81,10 @@ ASSEMBLY_JAR=$(ls 
$assembly_folder/spark-assembly*hadoop*.jar 2/dev/null)
 # Verify that versions of java used to build the jars and run Spark are 
compatible
 jar_error_check=$($JAR_CMD -tf $ASSEMBLY_JAR nonexistent/class/path 21)
 if [[ $jar_error_check =~ invalid CEN header ]]; then
-  echo Loading Spark jar with '$JAR_CMD' failed. 
-  echo This is likely because Spark was compiled with Java 7 and run 
-  echo with Java 6. (see SPARK-1703). Please use Java 7 to run Spark 
-  echo or build Spark with Java 6.
+  echo Loading Spark jar with '$JAR_CMD' failed.  12
+  echo This is likely because Spark was compiled with Java 7 and run  12
+  echo with Java 6. (see SPARK-1703). Please use Java 7 to run Spark  12
+  echo or build Spark with Java 6. 12
   exit 1
 fi
 

http://git-wip-us.apache.org/repos/asf/spark/blob/731f683b/bin/pyspark
--
diff --git a/bin/pyspark b/bin/pyspark
index 0b5ed40..69b056f 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -26,7 +26,7 @@ export SPARK_HOME=$FWDIR
 SCALA_VERSION=2.10
 
 if [[ $@ = *--help ]] || [[ $@ = *-h ]]; then
-  echo Usage: ./bin/pyspark [options]
+  echo Usage: ./bin/pyspark [options] 12
   $FWDIR/bin/spark-submit --help 21 | grep -v Usage 12
   exit 0
 fi
@@ -36,8 +36,8 @@ if [ ! -f $FWDIR/RELEASE ]; then
   # Exit if the user hasn't compiled Spark
   ls $FWDIR/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar 
 /dev/null
   if [[ $? != 0 ]]; then
-echo Failed to find Spark assembly in $FWDIR/assembly/target 2
-echo You need to build Spark before running this program 2
+echo Failed to find Spark assembly in $FWDIR/assembly/target 12
+echo You need to build Spark before running this program 12
 exit 1
   fi
 fi

http://git-wip-us.apache.org/repos/asf/spark/blob/731f683b/bin/run-example
--
diff --git a/bin/run-example b/bin/run-example
index e7a5fe3..942706d 100755
--- a/bin/run-example
+++ b/bin/run-example
@@ -27,9 +27,9 @@ if [ -n $1 ]; then
   EXAMPLE_CLASS=$1
   shift
 else
-  echo Usage: ./bin/run-example example-class [example-args]
-  echo   - set MASTER=XX to use a specific master
-  echo   - can use abbreviated example class name (e.g. SparkPi, 
mllib.LinearRegression)
+  echo Usage: ./bin/run-example example-class [example-args] 12
+  echo   - set MASTER=XX to use a specific master 12
+  echo   - can use abbreviated example class name (e.g. SparkPi, 
mllib.LinearRegression) 12
   exit 1
 fi
 
@@ -40,8 +40,8 @@ elif [ -e 
$EXAMPLES_DIR/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.ja
 fi
 
 if [[ -z $SPARK_EXAMPLES_JAR ]]; then
-  echo Failed to find Spark examples assembly in $FWDIR/lib or 
$FWDIR/examples/target 2
-  echo You need to build Spark before running this program 2
+  echo Failed to find Spark examples assembly in $FWDIR/lib or 
$FWDIR/examples/target 12
+  echo You need to build Spark before running this program 12
   exit 1
 fi
 

http://git-wip-us.apache.org/repos/asf/spark/blob/731f683b/bin/spark-class
--
diff --git a/bin/spark-class b/bin/spark-class
index 60d9657..04fa52c 100755
--- a/bin/spark-class
+++ b/bin/spark-class
@@ -33,13 +33,13 @@ export 

git commit: [SPARK-2109] Setting SPARK_MEM for bin/pyspark does not work.

2014-07-03 Thread pwendell
Repository: spark
Updated Branches:
  refs/heads/branch-1.0-jdbc cf0d14b01 - 9f7cf5bdb


[SPARK-2109] Setting SPARK_MEM for bin/pyspark does not work.

Trivial fix.

Author: Prashant Sharma prashan...@imaginea.com

Closes #1050 from ScrapCodes/SPARK-2109/pyspark-script-bug and squashes the 
following commits:

77072b9 [Prashant Sharma] Changed echos to redirect to STDERR.
13f48a0 [Prashant Sharma] [SPARK-2109] Setting SPARK_MEM for bin/pyspark does 
not work.
(cherry picked from commit 731f683b1bd8abbb83030b6bae14876658bbf098)

Signed-off-by: Patrick Wendell pwend...@gmail.com


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9f7cf5bd
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9f7cf5bd
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9f7cf5bd

Branch: refs/heads/branch-1.0-jdbc
Commit: 9f7cf5bdb2a5118b9d6404078a42eeb4918a2ae5
Parents: cf0d14b
Author: Prashant Sharma prashan...@imaginea.com
Authored: Thu Jul 3 15:06:58 2014 -0700
Committer: Patrick Wendell pwend...@gmail.com
Committed: Thu Jul 3 15:07:26 2014 -0700

--
 bin/compute-classpath.sh |  8 
 bin/pyspark  |  6 +++---
 bin/run-example  | 10 +-
 bin/spark-class  | 13 ++---
 4 files changed, 18 insertions(+), 19 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9f7cf5bd/bin/compute-classpath.sh
--
diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh
index f94743e..d61d377 100755
--- a/bin/compute-classpath.sh
+++ b/bin/compute-classpath.sh
@@ -66,10 +66,10 @@ fi
 # Verify that versions of java used to build the jars and run Spark are 
compatible
 jar_error_check=$($JAR_CMD -tf $ASSEMBLY_JAR nonexistent/class/path 21)
 if [[ $jar_error_check =~ invalid CEN header ]]; then
-  echo Loading Spark jar with '$JAR_CMD' failed. 
-  echo This is likely because Spark was compiled with Java 7 and run 
-  echo with Java 6. (see SPARK-1703). Please use Java 7 to run Spark 
-  echo or build Spark with Java 6.
+  echo Loading Spark jar with '$JAR_CMD' failed.  12
+  echo This is likely because Spark was compiled with Java 7 and run  12
+  echo with Java 6. (see SPARK-1703). Please use Java 7 to run Spark  12
+  echo or build Spark with Java 6. 12
   exit 1
 fi
 

http://git-wip-us.apache.org/repos/asf/spark/blob/9f7cf5bd/bin/pyspark
--
diff --git a/bin/pyspark b/bin/pyspark
index 114cbbc..acf3b27 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -26,7 +26,7 @@ export SPARK_HOME=$FWDIR
 SCALA_VERSION=2.10
 
 if [[ $@ = *--help ]] || [[ $@ = *-h ]]; then
-  echo Usage: ./bin/pyspark [options]
+  echo Usage: ./bin/pyspark [options] 12
   $FWDIR/bin/spark-submit --help 21 | grep -v Usage 12
   exit 0
 fi
@@ -36,8 +36,8 @@ if [ ! -f $FWDIR/RELEASE ]; then
   # Exit if the user hasn't compiled Spark
   ls $FWDIR/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar 
 /dev/null
   if [[ $? != 0 ]]; then
-echo Failed to find Spark assembly in $FWDIR/assembly/target 2
-echo You need to build Spark before running this program 2
+echo Failed to find Spark assembly in $FWDIR/assembly/target 12
+echo You need to build Spark before running this program 12
 exit 1
   fi
 fi

http://git-wip-us.apache.org/repos/asf/spark/blob/9f7cf5bd/bin/run-example
--
diff --git a/bin/run-example b/bin/run-example
index e7a5fe3..942706d 100755
--- a/bin/run-example
+++ b/bin/run-example
@@ -27,9 +27,9 @@ if [ -n $1 ]; then
   EXAMPLE_CLASS=$1
   shift
 else
-  echo Usage: ./bin/run-example example-class [example-args]
-  echo   - set MASTER=XX to use a specific master
-  echo   - can use abbreviated example class name (e.g. SparkPi, 
mllib.LinearRegression)
+  echo Usage: ./bin/run-example example-class [example-args] 12
+  echo   - set MASTER=XX to use a specific master 12
+  echo   - can use abbreviated example class name (e.g. SparkPi, 
mllib.LinearRegression) 12
   exit 1
 fi
 
@@ -40,8 +40,8 @@ elif [ -e 
$EXAMPLES_DIR/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.ja
 fi
 
 if [[ -z $SPARK_EXAMPLES_JAR ]]; then
-  echo Failed to find Spark examples assembly in $FWDIR/lib or 
$FWDIR/examples/target 2
-  echo You need to build Spark before running this program 2
+  echo Failed to find Spark examples assembly in $FWDIR/lib or 
$FWDIR/examples/target 12
+  echo You need to build Spark before running this program 12
   exit 1
 fi
 

http://git-wip-us.apache.org/repos/asf/spark/blob/9f7cf5bd/bin/spark-class
--
diff --git a/bin/spark-class b/bin/spark-class
index e884511..14d4a03 100755
--- 

git commit: [SPARK-2109] Setting SPARK_MEM for bin/pyspark does not work.

2014-07-03 Thread pwendell
Repository: spark
Updated Branches:
  refs/heads/branch-1.0 7766c9d26 - 1d3616579


[SPARK-2109] Setting SPARK_MEM for bin/pyspark does not work.

Trivial fix.

Author: Prashant Sharma prashan...@imaginea.com

Closes #1050 from ScrapCodes/SPARK-2109/pyspark-script-bug and squashes the 
following commits:

77072b9 [Prashant Sharma] Changed echos to redirect to STDERR.
13f48a0 [Prashant Sharma] [SPARK-2109] Setting SPARK_MEM for bin/pyspark does 
not work.
(cherry picked from commit 731f683b1bd8abbb83030b6bae14876658bbf098)

Signed-off-by: Patrick Wendell pwend...@gmail.com


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1d361657
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1d361657
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1d361657

Branch: refs/heads/branch-1.0
Commit: 1d36165796b656bb90409e9e254e27b7e72d36d0
Parents: 7766c9d
Author: Prashant Sharma prashan...@imaginea.com
Authored: Thu Jul 3 15:06:58 2014 -0700
Committer: Patrick Wendell pwend...@gmail.com
Committed: Thu Jul 3 15:09:11 2014 -0700

--
 bin/compute-classpath.sh |  8 
 bin/pyspark  |  6 +++---
 bin/run-example  | 10 +-
 bin/spark-class  | 13 ++---
 4 files changed, 18 insertions(+), 19 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1d361657/bin/compute-classpath.sh
--
diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh
index 7df43a5..067850f 100755
--- a/bin/compute-classpath.sh
+++ b/bin/compute-classpath.sh
@@ -65,10 +65,10 @@ fi
 # Verify that versions of java used to build the jars and run Spark are 
compatible
 jar_error_check=$($JAR_CMD -tf $ASSEMBLY_JAR nonexistent/class/path 21)
 if [[ $jar_error_check =~ invalid CEN header ]]; then
-  echo Loading Spark jar with '$JAR_CMD' failed. 
-  echo This is likely because Spark was compiled with Java 7 and run 
-  echo with Java 6. (see SPARK-1703). Please use Java 7 to run Spark 
-  echo or build Spark with Java 6.
+  echo Loading Spark jar with '$JAR_CMD' failed.  12
+  echo This is likely because Spark was compiled with Java 7 and run  12
+  echo with Java 6. (see SPARK-1703). Please use Java 7 to run Spark  12
+  echo or build Spark with Java 6. 12
   exit 1
 fi
 

http://git-wip-us.apache.org/repos/asf/spark/blob/1d361657/bin/pyspark
--
diff --git a/bin/pyspark b/bin/pyspark
index 114cbbc..acf3b27 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -26,7 +26,7 @@ export SPARK_HOME=$FWDIR
 SCALA_VERSION=2.10
 
 if [[ $@ = *--help ]] || [[ $@ = *-h ]]; then
-  echo Usage: ./bin/pyspark [options]
+  echo Usage: ./bin/pyspark [options] 12
   $FWDIR/bin/spark-submit --help 21 | grep -v Usage 12
   exit 0
 fi
@@ -36,8 +36,8 @@ if [ ! -f $FWDIR/RELEASE ]; then
   # Exit if the user hasn't compiled Spark
   ls $FWDIR/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar 
 /dev/null
   if [[ $? != 0 ]]; then
-echo Failed to find Spark assembly in $FWDIR/assembly/target 2
-echo You need to build Spark before running this program 2
+echo Failed to find Spark assembly in $FWDIR/assembly/target 12
+echo You need to build Spark before running this program 12
 exit 1
   fi
 fi

http://git-wip-us.apache.org/repos/asf/spark/blob/1d361657/bin/run-example
--
diff --git a/bin/run-example b/bin/run-example
index e7a5fe3..942706d 100755
--- a/bin/run-example
+++ b/bin/run-example
@@ -27,9 +27,9 @@ if [ -n $1 ]; then
   EXAMPLE_CLASS=$1
   shift
 else
-  echo Usage: ./bin/run-example example-class [example-args]
-  echo   - set MASTER=XX to use a specific master
-  echo   - can use abbreviated example class name (e.g. SparkPi, 
mllib.LinearRegression)
+  echo Usage: ./bin/run-example example-class [example-args] 12
+  echo   - set MASTER=XX to use a specific master 12
+  echo   - can use abbreviated example class name (e.g. SparkPi, 
mllib.LinearRegression) 12
   exit 1
 fi
 
@@ -40,8 +40,8 @@ elif [ -e 
$EXAMPLES_DIR/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.ja
 fi
 
 if [[ -z $SPARK_EXAMPLES_JAR ]]; then
-  echo Failed to find Spark examples assembly in $FWDIR/lib or 
$FWDIR/examples/target 2
-  echo You need to build Spark before running this program 2
+  echo Failed to find Spark examples assembly in $FWDIR/lib or 
$FWDIR/examples/target 12
+  echo You need to build Spark before running this program 12
   exit 1
 fi
 

http://git-wip-us.apache.org/repos/asf/spark/blob/1d361657/bin/spark-class
--
diff --git a/bin/spark-class b/bin/spark-class
index e884511..14d4a03 100755
--- 

git commit: [HOTFIX] Synchronize on SQLContext.settings in tests.

2014-07-03 Thread marmbrus
Repository: spark
Updated Branches:
  refs/heads/master 731f683b1 - d4c30cd99


[HOTFIX] Synchronize on SQLContext.settings in tests.

Let's see if this fixes the ongoing series of test failures in a master build 
machine 
(https://amplab.cs.berkeley.edu/jenkins/job/Spark-Master-SBT-pre-YARN/SPARK_HADOOP_VERSION=1.0.4,label=centos/81/).

pwendell marmbrus

Author: Zongheng Yang zonghen...@gmail.com

Closes #1277 from concretevitamin/test-fix and squashes the following commits:

28c88bd [Zongheng Yang] Synchronize on SQLContext.settings in tests.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d4c30cd9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d4c30cd9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d4c30cd9

Branch: refs/heads/master
Commit: d4c30cd9918e18dde2a52909e36eaef6eb5996ab
Parents: 731f683
Author: Zongheng Yang zonghen...@gmail.com
Authored: Thu Jul 3 17:37:53 2014 -0700
Committer: Michael Armbrust mich...@databricks.com
Committed: Thu Jul 3 17:37:53 2014 -0700

--
 .../scala/org/apache/spark/sql/SQLConf.scala|  2 +-
 .../scala/org/apache/spark/sql/JoinSuite.scala  | 40 ++--
 .../org/apache/spark/sql/SQLConfSuite.scala | 64 +-
 .../org/apache/spark/sql/SQLQuerySuite.scala| 68 ++--
 4 files changed, 91 insertions(+), 83 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d4c30cd9/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
--
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
index 2fe7f94..3b5abab 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
@@ -50,7 +50,7 @@ trait SQLConf {
   /** ** SQLConf functionality methods  */
 
   @transient
-  private val settings = java.util.Collections.synchronizedMap(
+  protected[sql] val settings = java.util.Collections.synchronizedMap(
 new java.util.HashMap[String, String]())
 
   def set(props: Properties): Unit = {

http://git-wip-us.apache.org/repos/asf/spark/blob/d4c30cd9/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
--
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index 3d7d5ee..054b14f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -39,25 +39,27 @@ class JoinSuite extends QueryTest {
   test(plans broadcast hash join, given hints) {
 
 def mkTest(buildSide: BuildSide, leftTable: String, rightTable: String) = {
-  TestSQLContext.set(spark.sql.join.broadcastTables,
-s${if (buildSide == BuildRight) rightTable else leftTable})
-  val rdd = sql(sSELECT * FROM $leftTable JOIN $rightTable ON key = 
a)
-  // Using `sparkPlan` because for relevant patterns in HashJoin to be
-  // matched, other strategies need to be applied.
-  val physical = rdd.queryExecution.sparkPlan
-  val bhj = physical.collect { case j: BroadcastHashJoin if j.buildSide == 
buildSide = j }
-
-  assert(bhj.size === 1, planner does not pick up hint to generate 
broadcast hash join)
-  checkAnswer(
-rdd,
-Seq(
-  (1, 1, 1, 1),
-  (1, 1, 1, 2),
-  (2, 2, 2, 1),
-  (2, 2, 2, 2),
-  (3, 3, 3, 1),
-  (3, 3, 3, 2)
-))
+  TestSQLContext.settings.synchronized {
+TestSQLContext.set(spark.sql.join.broadcastTables,
+  s${if (buildSide == BuildRight) rightTable else leftTable})
+val rdd = sql( sSELECT * FROM $leftTable JOIN $rightTable ON key = 
a)
+// Using `sparkPlan` because for relevant patterns in HashJoin to be
+// matched, other strategies need to be applied.
+val physical = rdd.queryExecution.sparkPlan
+val bhj = physical.collect { case j: BroadcastHashJoin if j.buildSide 
== buildSide = j}
+
+assert(bhj.size === 1, planner does not pick up hint to generate 
broadcast hash join)
+checkAnswer(
+  rdd,
+  Seq(
+(1, 1, 1, 1),
+(1, 1, 1, 2),
+(2, 2, 2, 1),
+(2, 2, 2, 2),
+(3, 3, 3, 1),
+(3, 3, 3, 2)
+  ))
+  }
 }
 
 mkTest(BuildRight, testData, testData2)

http://git-wip-us.apache.org/repos/asf/spark/blob/d4c30cd9/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala
--
diff --git 

git commit: Streaming programming guide typos

2014-07-03 Thread tdas
Repository: spark
Updated Branches:
  refs/heads/master d4c30cd99 - fdc4c112e


Streaming programming guide typos

Fix a bad Java code sample and a broken link in the streaming programming guide.

Author: Clément MATHIEU clem...@unportant.info

Closes #1286 from cykl/streaming-programming-guide-typos and squashes the 
following commits:

b0908cb [Clément MATHIEU] Fix broken URL
9d3c535 [Clément MATHIEU] Spark streaming requires at least two working 
threads (scala version was OK)


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fdc4c112
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fdc4c112
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fdc4c112

Branch: refs/heads/master
Commit: fdc4c112e7c2ac585d108d03209a642aa8bab7c8
Parents: d4c30cd
Author: Clément MATHIEU clem...@unportant.info
Authored: Thu Jul 3 18:31:18 2014 -0700
Committer: Tathagata Das tathagata.das1...@gmail.com
Committed: Thu Jul 3 18:31:18 2014 -0700

--
 docs/streaming-programming-guide.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/fdc4c112/docs/streaming-programming-guide.md
--
diff --git a/docs/streaming-programming-guide.md 
b/docs/streaming-programming-guide.md
index ce8e58d..90a0eef 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -148,7 +148,7 @@ import org.apache.spark.streaming.*;
 import org.apache.spark.streaming.api.java.*;
 import scala.Tuple2;
 // Create a StreamingContext with a local master
-JavaStreamingContext jssc = new JavaStreamingContext(local, 
JavaNetworkWordCount, new Duration(1000))
+JavaStreamingContext jssc = new JavaStreamingContext(local[2], 
JavaNetworkWordCount, new Duration(1000))
 {% endhighlight %}
 
 Using this context, we then create a new DStream
@@ -216,7 +216,7 @@ jssc.awaitTermination();   // Wait for the computation to 
terminate
 {% endhighlight %}
 
 The complete code can be found in the Spark Streaming example
-[JavaNetworkWordCount]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/java/index.html?org/apache/spark/examples/streaming/JavaNetworkWordCount.java).
+[JavaNetworkWordCount]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/java/org/apache/spark/examples/streaming/JavaNetworkWordCount.java).
 br
 
 /div



git commit: [SPARK-1097] Workaround Hadoop conf ConcurrentModification issue

2014-07-03 Thread adav
Repository: spark
Updated Branches:
  refs/heads/master fdc4c112e - 5fa0a0576


[SPARK-1097] Workaround Hadoop conf ConcurrentModification issue

Workaround Hadoop conf ConcurrentModification issue

Author: Raymond Liu raymond@intel.com

Closes #1273 from colorant/hadoopRDD and squashes the following commits:

994e98b [Raymond Liu] Address comments
e2cda3d [Raymond Liu] Workaround Hadoop conf ConcurrentModification issue


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5fa0a057
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5fa0a057
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5fa0a057

Branch: refs/heads/master
Commit: 5fa0a05763ab1d527efe20e3b10539ac5ffc36de
Parents: fdc4c11
Author: Raymond Liu raymond@intel.com
Authored: Thu Jul 3 19:24:22 2014 -0700
Committer: Aaron Davidson aa...@databricks.com
Committed: Thu Jul 3 19:24:22 2014 -0700

--
 core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5fa0a057/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
--
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala 
b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index 98dcbf4..0410285 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -141,8 +141,8 @@ class HadoopRDD[K, V](
   // local process. The local cache is accessed through 
HadoopRDD.putCachedMetadata().
   // The caching helps minimize GC, since a JobConf can contain ~10KB of 
temporary objects.
   // synchronize to prevent ConcurrentModificationException (Spark-1097, 
Hadoop-10456)
-  broadcastedConf.synchronized {
-val newJobConf = new JobConf(broadcastedConf.value.value)
+  conf.synchronized {
+val newJobConf = new JobConf(conf)
 initLocalJobConfFuncOpt.map(f = f(newJobConf))
 HadoopRDD.putCachedMetadata(jobConfCacheKey, newJobConf)
 newJobConf



git commit: [SPARK-2350] Don't NPE while launching drivers

2014-07-03 Thread pwendell
Repository: spark
Updated Branches:
  refs/heads/branch-1.0 d2f253467 - 27a2afed6


[SPARK-2350] Don't NPE while launching drivers

Prior to this change, we could throw a NPE if we launch a driver while another 
one is waiting, because removing from an iterator while iterating over it is 
not safe.

Author: Aaron Davidson aa...@databricks.com

Closes #1289 from aarondav/master-fail and squashes the following commits:

1cf1cf4 [Aaron Davidson] SPARK-2350: Don't NPE while launching drivers
(cherry picked from commit 586feb5c9528042420f678f78bacb6c254a5eaf8)

Signed-off-by: Patrick Wendell pwend...@gmail.com


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/27a2afed
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/27a2afed
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/27a2afed

Branch: refs/heads/branch-1.0
Commit: 27a2afed60035474a100436798c8048506addb54
Parents: d2f2534
Author: Aaron Davidson aa...@databricks.com
Authored: Thu Jul 3 22:31:41 2014 -0700
Committer: Patrick Wendell pwend...@gmail.com
Committed: Thu Jul 3 22:32:05 2014 -0700

--
 core/src/main/scala/org/apache/spark/deploy/master/Master.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/27a2afed/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
--
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala 
b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 33ffcbd..4ed4827 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -481,7 +481,7 @@ private[spark] class Master(
 // First schedule drivers, they take strict precedence over applications
 val shuffledWorkers = Random.shuffle(workers) // Randomization helps 
balance drivers
 for (worker - shuffledWorkers if worker.state == WorkerState.ALIVE) {
-  for (driver - waitingDrivers) {
+  for (driver - List(waitingDrivers: _*)) { // iterate over a copy of 
waitingDrivers
 if (worker.memoryFree = driver.desc.mem  worker.coresFree = 
driver.desc.cores) {
   launchDriver(worker, driver)
   waitingDrivers -= driver



git commit: [SPARK-2350] Don't NPE while launching drivers

2014-07-03 Thread pwendell
Repository: spark
Updated Branches:
  refs/heads/master 5fa0a0576 - 586feb5c9


[SPARK-2350] Don't NPE while launching drivers

Prior to this change, we could throw a NPE if we launch a driver while another 
one is waiting, because removing from an iterator while iterating over it is 
not safe.

Author: Aaron Davidson aa...@databricks.com

Closes #1289 from aarondav/master-fail and squashes the following commits:

1cf1cf4 [Aaron Davidson] SPARK-2350: Don't NPE while launching drivers


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/586feb5c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/586feb5c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/586feb5c

Branch: refs/heads/master
Commit: 586feb5c9528042420f678f78bacb6c254a5eaf8
Parents: 5fa0a05
Author: Aaron Davidson aa...@databricks.com
Authored: Thu Jul 3 22:31:41 2014 -0700
Committer: Patrick Wendell pwend...@gmail.com
Committed: Thu Jul 3 22:31:41 2014 -0700

--
 core/src/main/scala/org/apache/spark/deploy/master/Master.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/586feb5c/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
--
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala 
b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 11545b8..a304102 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -481,7 +481,7 @@ private[spark] class Master(
 // First schedule drivers, they take strict precedence over applications
 val shuffledWorkers = Random.shuffle(workers) // Randomization helps 
balance drivers
 for (worker - shuffledWorkers if worker.state == WorkerState.ALIVE) {
-  for (driver - waitingDrivers) {
+  for (driver - List(waitingDrivers: _*)) { // iterate over a copy of 
waitingDrivers
 if (worker.memoryFree = driver.desc.mem  worker.coresFree = 
driver.desc.cores) {
   launchDriver(worker, driver)
   waitingDrivers -= driver



git commit: [SPARK-2307][Reprise] Correctly report RDD blocks on SparkUI

2014-07-03 Thread pwendell
Repository: spark
Updated Branches:
  refs/heads/branch-1.0 27a2afed6 - cf1d46e46


[SPARK-2307][Reprise] Correctly report RDD blocks on SparkUI

**Problem.** The existing code in `ExecutorPage.scala` requires a linear scan 
through all the blocks to filter out the uncached ones. Every refresh could be 
expensive if there are many blocks and many executors.

**Solution.** The proper semantics should be the following: 
`StorageStatusListener` should contain only block statuses that are cached. 
This means as soon as a block is unpersisted by any mean, its status should be 
removed. This is reflected in the changes made in `StorageStatusListener.scala`.

Further, the `StorageTab` must stop relying on the `StorageStatusListener` 
changing a dropped block's status to `StorageLevel.NONE` (which no longer 
happens). This is reflected in the changes made in `StorageTab.scala` and 
`StorageUtils.scala`.

--

If you have been following this chain of PRs like pwendell, you will quickly 
notice that this reverts the changes in #1249, which reverts the changes in 
#1080. In other words, we are adding back the changes from #1080, and fixing 
SPARK-2307 on top of those changes. Please ask questions if you are confused.

Author: Andrew Or andrewo...@gmail.com

Closes #1255 from andrewor14/storage-ui-fix-reprise and squashes the following 
commits:

45416fa [Andrew Or] Merge branch 'master' of github.com:apache/spark into 
storage-ui-fix-reprise
a82ea25 [Andrew Or] Add tests for StorageStatusListener
8773b01 [Andrew Or] Update comment / minor changes
3afde3f [Andrew Or] Correctly report the number of blocks on SparkUI
(cherry picked from commit 3894a49be9b532cc026d908a0f49bca850504498)

Signed-off-by: Patrick Wendell pwend...@gmail.com


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cf1d46e4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cf1d46e4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cf1d46e4

Branch: refs/heads/branch-1.0
Commit: cf1d46e46518c818d20f07cdaabbd8069d877ca8
Parents: 27a2afe
Author: Andrew Or andrewo...@gmail.com
Authored: Thu Jul 3 22:48:23 2014 -0700
Committer: Patrick Wendell pwend...@gmail.com
Committed: Thu Jul 3 22:48:33 2014 -0700

--
 .../spark/storage/StorageStatusListener.scala   |  17 ++-
 .../org/apache/spark/storage/StorageUtils.scala |  15 +-
 .../apache/spark/ui/exec/ExecutorsPage.scala|   4 +-
 .../org/apache/spark/ui/exec/ExecutorsTab.scala |   4 +-
 .../apache/spark/ui/storage/StorageTab.scala|  15 +-
 .../storage/StorageStatusListenerSuite.scala| 152 +++
 6 files changed, 184 insertions(+), 23 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/cf1d46e4/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala 
b/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala
index a6e6627..41c960c 100644
--- a/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala
@@ -28,26 +28,31 @@ import org.apache.spark.scheduler._
  */
 @DeveloperApi
 class StorageStatusListener extends SparkListener {
-  private val executorIdToStorageStatus = mutable.Map[String, StorageStatus]()
+  // This maintains only blocks that are cached (i.e. storage level is not 
StorageLevel.NONE)
+  private[storage] val executorIdToStorageStatus = mutable.Map[String, 
StorageStatus]()
 
   def storageStatusList = executorIdToStorageStatus.values.toSeq
 
   /** Update storage status list to reflect updated block statuses */
-  def updateStorageStatus(execId: String, updatedBlocks: Seq[(BlockId, 
BlockStatus)]) {
-val filteredStatus = storageStatusList.find(_.blockManagerId.executorId == 
execId)
+  private def updateStorageStatus(execId: String, updatedBlocks: Seq[(BlockId, 
BlockStatus)]) {
+val filteredStatus = executorIdToStorageStatus.get(execId)
 filteredStatus.foreach { storageStatus =
   updatedBlocks.foreach { case (blockId, updatedStatus) =
-storageStatus.blocks(blockId) = updatedStatus
+if (updatedStatus.storageLevel == StorageLevel.NONE) {
+  storageStatus.blocks.remove(blockId)
+} else {
+  storageStatus.blocks(blockId) = updatedStatus
+}
   }
 }
   }
 
   /** Update storage status list to reflect the removal of an RDD from the 
cache */
-  def updateStorageStatus(unpersistedRDDId: Int) {
+  private def updateStorageStatus(unpersistedRDDId: Int) {
 storageStatusList.foreach { storageStatus =
   val unpersistedBlocksIds = storageStatus.rddBlocks.keys.filter(_.rddId 
==