svn commit: r1607545 - in /spark: images/graphx-perf-comparison.png site/images/graphx-perf-comparison.png
Author: ankurdave Date: Thu Jul 3 07:08:46 2014 New Revision: 1607545 URL: http://svn.apache.org/r1607545 Log: Correct the GraphX performance comparison graphic Modified: spark/images/graphx-perf-comparison.png spark/site/images/graphx-perf-comparison.png Modified: spark/images/graphx-perf-comparison.png URL: http://svn.apache.org/viewvc/spark/images/graphx-perf-comparison.png?rev=1607545r1=1607544r2=1607545view=diff == Binary files - no diff available. Modified: spark/site/images/graphx-perf-comparison.png URL: http://svn.apache.org/viewvc/spark/site/images/graphx-perf-comparison.png?rev=1607545r1=1607544r2=1607545view=diff == Binary files - no diff available.
git commit: [SPARK-2324] SparkContext should not exit directly when spark.local.dir is a list of multiple paths and one of them has error
Repository: spark Updated Branches: refs/heads/master bc7041a42 - 3bbeca648 [SPARK-2324] SparkContext should not exit directly when spark.local.dir is a list of multiple paths and one of them has error The spark.local.dir is configured as a list of multiple paths as follows /data1/sparkenv/local,/data2/sparkenv/local. If the disk data2 of the driver node has error, the application will exit since DiskBlockManager exits directly at createLocalDirs. If the disk data2 of the worker node has error, the executor will exit either. DiskBlockManager should not exit directly at createLocalDirs if one of spark.local.dir has error. Since spark.local.dir has multiple paths, a problem should not affect the overall situation. I think DiskBlockManager could ignore the bad directory at createLocalDirs. Author: yantangzhai tyz0...@163.com Closes #1274 from YanTangZhai/SPARK-2324 and squashes the following commits: 609bf48 [yantangzhai] [SPARK-2324] SparkContext should not exit directly when spark.local.dir is a list of multiple paths and one of them has error df08673 [yantangzhai] [SPARK-2324] SparkContext should not exit directly when spark.local.dir is a list of multiple paths and one of them has error Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3bbeca64 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3bbeca64 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3bbeca64 Branch: refs/heads/master Commit: 3bbeca648985b32bdf1eedef779cb2817eb6dfa4 Parents: bc7041a Author: yantangzhai tyz0...@163.com Authored: Thu Jul 3 10:14:35 2014 -0700 Committer: Aaron Davidson aa...@databricks.com Committed: Thu Jul 3 10:14:35 2014 -0700 -- .../org/apache/spark/storage/DiskBlockManager.scala | 16 +++- 1 file changed, 11 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3bbeca64/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala -- diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala index 2ec46d4..673fc19 100644 --- a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala @@ -44,6 +44,10 @@ private[spark] class DiskBlockManager(shuffleManager: ShuffleBlockManager, rootD * directory, create multiple subdirectories that we will hash files into, in order to avoid * having really large inodes at the top level. */ private val localDirs: Array[File] = createLocalDirs() + if (localDirs.isEmpty) { +logError(Failed to create any local dir.) +System.exit(ExecutorExitCode.DISK_STORE_FAILED_TO_CREATE_DIR) + } private val subDirs = Array.fill(localDirs.length)(new Array[File](subDirsPerLocalDir)) private var shuffleSender : ShuffleSender = null @@ -116,7 +120,7 @@ private[spark] class DiskBlockManager(shuffleManager: ShuffleBlockManager, rootD private def createLocalDirs(): Array[File] = { logDebug(sCreating local directories at root dirs '$rootDirs') val dateFormat = new SimpleDateFormat(MMddHHmmss) -rootDirs.split(,).map { rootDir = +rootDirs.split(,).flatMap { rootDir = var foundLocalDir = false var localDir: File = null var localDirId: String = null @@ -136,11 +140,13 @@ private[spark] class DiskBlockManager(shuffleManager: ShuffleBlockManager, rootD } } if (!foundLocalDir) { -logError(sFailed $MAX_DIR_CREATION_ATTEMPTS attempts to create local dir in $rootDir) -System.exit(ExecutorExitCode.DISK_STORE_FAILED_TO_CREATE_DIR) +logError(sFailed $MAX_DIR_CREATION_ATTEMPTS attempts to create local dir in $rootDir. + + Ignoring this directory.) +None + } else { +logInfo(sCreated local directory at $localDir) +Some(localDir) } - logInfo(sCreated local directory at $localDir) - localDir } }
git commit: [SPARK] Fix NPE for ExternalAppendOnlyMap
Repository: spark Updated Branches: refs/heads/master 3bbeca648 - c48053773 [SPARK] Fix NPE for ExternalAppendOnlyMap It did not handle null keys very gracefully before. Author: Andrew Or andrewo...@gmail.com Closes #1288 from andrewor14/fix-external and squashes the following commits: 312b8d8 [Andrew Or] Abstract key hash code ed5adf9 [Andrew Or] Fix NPE for ExternalAppendOnlyMap Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c4805377 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c4805377 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c4805377 Branch: refs/heads/master Commit: c480537739f9329ebfd580f09c69778e6c976366 Parents: 3bbeca6 Author: Andrew Or andrewo...@gmail.com Authored: Thu Jul 3 10:26:50 2014 -0700 Committer: Aaron Davidson aa...@databricks.com Committed: Thu Jul 3 10:26:50 2014 -0700 -- .../util/collection/ExternalAppendOnlyMap.scala | 30 ++-- .../collection/ExternalAppendOnlyMapSuite.scala | 27 -- 2 files changed, 46 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c4805377/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala index 288badd..292d096 100644 --- a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala +++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala @@ -252,7 +252,7 @@ class ExternalAppendOnlyMap[K, V, C]( if (it.hasNext) { var kc = it.next() kcPairs += kc -val minHash = kc._1.hashCode() +val minHash = getKeyHashCode(kc) while (it.hasNext it.head._1.hashCode() == minHash) { kc = it.next() kcPairs += kc @@ -294,8 +294,9 @@ class ExternalAppendOnlyMap[K, V, C]( // Select a key from the StreamBuffer that holds the lowest key hash val minBuffer = mergeHeap.dequeue() val (minPairs, minHash) = (minBuffer.pairs, minBuffer.minKeyHash) - var (minKey, minCombiner) = minPairs.remove(0) - assert(minKey.hashCode() == minHash) + val minPair = minPairs.remove(0) + var (minKey, minCombiner) = minPair + assert(getKeyHashCode(minPair) == minHash) // For all other streams that may have this key (i.e. have the same minimum key hash), // merge in the corresponding value (if any) from that stream @@ -327,15 +328,16 @@ class ExternalAppendOnlyMap[K, V, C]( * StreamBuffers are ordered by the minimum key hash found across all of their own pairs. */ private class StreamBuffer( -val iterator: BufferedIterator[(K, C)], val pairs: ArrayBuffer[(K, C)]) +val iterator: BufferedIterator[(K, C)], +val pairs: ArrayBuffer[(K, C)]) extends Comparable[StreamBuffer] { def isEmpty = pairs.length == 0 // Invalid if there are no more pairs in this stream - def minKeyHash = { + def minKeyHash: Int = { assert(pairs.length 0) -pairs.head._1.hashCode() +getKeyHashCode(pairs.head) } override def compareTo(other: StreamBuffer): Int = { @@ -422,10 +424,22 @@ class ExternalAppendOnlyMap[K, V, C]( } private[spark] object ExternalAppendOnlyMap { + + /** + * Return the key hash code of the given (key, combiner) pair. + * If the key is null, return a special hash code. + */ + private def getKeyHashCode[K, C](kc: (K, C)): Int = { +if (kc._1 == null) 0 else kc._1.hashCode() + } + + /** + * A comparator for (key, combiner) pairs based on their key hash codes. + */ private class KCComparator[K, C] extends Comparator[(K, C)] { def compare(kc1: (K, C), kc2: (K, C)): Int = { - val hash1 = kc1._1.hashCode() - val hash2 = kc2._1.hashCode() + val hash1 = getKeyHashCode(kc1) + val hash2 = getKeyHashCode(kc2) if (hash1 hash2) -1 else if (hash1 == hash2) 0 else 1 } } http://git-wip-us.apache.org/repos/asf/spark/blob/c4805377/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala index deb7809..4288229 100644 --- a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala @@
git commit: [SPARK] Fix NPE for ExternalAppendOnlyMap
Repository: spark Updated Branches: refs/heads/branch-1.0 87b74a9bf - fdee6ee06 [SPARK] Fix NPE for ExternalAppendOnlyMap It did not handle null keys very gracefully before. Author: Andrew Or andrewo...@gmail.com Closes #1288 from andrewor14/fix-external and squashes the following commits: 312b8d8 [Andrew Or] Abstract key hash code ed5adf9 [Andrew Or] Fix NPE for ExternalAppendOnlyMap (cherry picked from commit c480537739f9329ebfd580f09c69778e6c976366) Signed-off-by: Aaron Davidson aa...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fdee6ee0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fdee6ee0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fdee6ee0 Branch: refs/heads/branch-1.0 Commit: fdee6ee0655f04e9a0d3a66f2e8df5486a5ea032 Parents: 87b74a9 Author: Andrew Or andrewo...@gmail.com Authored: Thu Jul 3 10:26:50 2014 -0700 Committer: Aaron Davidson aa...@databricks.com Committed: Thu Jul 3 10:28:06 2014 -0700 -- .../util/collection/ExternalAppendOnlyMap.scala | 30 ++-- .../collection/ExternalAppendOnlyMapSuite.scala | 27 -- 2 files changed, 46 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fdee6ee0/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala index 288badd..292d096 100644 --- a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala +++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala @@ -252,7 +252,7 @@ class ExternalAppendOnlyMap[K, V, C]( if (it.hasNext) { var kc = it.next() kcPairs += kc -val minHash = kc._1.hashCode() +val minHash = getKeyHashCode(kc) while (it.hasNext it.head._1.hashCode() == minHash) { kc = it.next() kcPairs += kc @@ -294,8 +294,9 @@ class ExternalAppendOnlyMap[K, V, C]( // Select a key from the StreamBuffer that holds the lowest key hash val minBuffer = mergeHeap.dequeue() val (minPairs, minHash) = (minBuffer.pairs, minBuffer.minKeyHash) - var (minKey, minCombiner) = minPairs.remove(0) - assert(minKey.hashCode() == minHash) + val minPair = minPairs.remove(0) + var (minKey, minCombiner) = minPair + assert(getKeyHashCode(minPair) == minHash) // For all other streams that may have this key (i.e. have the same minimum key hash), // merge in the corresponding value (if any) from that stream @@ -327,15 +328,16 @@ class ExternalAppendOnlyMap[K, V, C]( * StreamBuffers are ordered by the minimum key hash found across all of their own pairs. */ private class StreamBuffer( -val iterator: BufferedIterator[(K, C)], val pairs: ArrayBuffer[(K, C)]) +val iterator: BufferedIterator[(K, C)], +val pairs: ArrayBuffer[(K, C)]) extends Comparable[StreamBuffer] { def isEmpty = pairs.length == 0 // Invalid if there are no more pairs in this stream - def minKeyHash = { + def minKeyHash: Int = { assert(pairs.length 0) -pairs.head._1.hashCode() +getKeyHashCode(pairs.head) } override def compareTo(other: StreamBuffer): Int = { @@ -422,10 +424,22 @@ class ExternalAppendOnlyMap[K, V, C]( } private[spark] object ExternalAppendOnlyMap { + + /** + * Return the key hash code of the given (key, combiner) pair. + * If the key is null, return a special hash code. + */ + private def getKeyHashCode[K, C](kc: (K, C)): Int = { +if (kc._1 == null) 0 else kc._1.hashCode() + } + + /** + * A comparator for (key, combiner) pairs based on their key hash codes. + */ private class KCComparator[K, C] extends Comparator[(K, C)] { def compare(kc1: (K, C), kc2: (K, C)): Int = { - val hash1 = kc1._1.hashCode() - val hash2 = kc2._1.hashCode() + val hash1 = getKeyHashCode(kc1) + val hash2 = getKeyHashCode(kc2) if (hash1 hash2) -1 else if (hash1 == hash2) 0 else 1 } } http://git-wip-us.apache.org/repos/asf/spark/blob/fdee6ee0/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala index deb7809..4288229 100644 ---
git commit: SPARK-1675. Make clear whether computePrincipalComponents requires centered data
Repository: spark Updated Branches: refs/heads/master c48053773 - 2b36344f5 SPARK-1675. Make clear whether computePrincipalComponents requires centered data Just closing out this small JIRA, resolving with a comment change. Author: Sean Owen so...@cloudera.com Closes #1171 from srowen/SPARK-1675 and squashes the following commits: 45ee9b7 [Sean Owen] Add simple note that data need not be centered for computePrincipalComponents Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2b36344f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2b36344f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2b36344f Branch: refs/heads/master Commit: 2b36344f588d4e7357ce9921dc656e2389ba1dea Parents: c480537 Author: Sean Owen so...@cloudera.com Authored: Thu Jul 3 11:54:51 2014 -0700 Committer: Xiangrui Meng m...@databricks.com Committed: Thu Jul 3 11:54:51 2014 -0700 -- .../org/apache/spark/mllib/linalg/distributed/RowMatrix.scala | 2 ++ 1 file changed, 2 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2b36344f/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala index 1a0073c..695e03b 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala @@ -347,6 +347,8 @@ class RowMatrix( * The principal components are stored a local matrix of size n-by-k. * Each column corresponds for one principal component, * and the columns are in descending order of component variance. + * The row data do not need to be centered first; it is not necessary for + * the mean of each column to be 0. * * @param k number of top principal components. * @return a matrix of size n-by-k, whose columns are principal components
git commit: [SPARK-2342] Evaluation helper's output type doesn't conform to input ty...
Repository: spark Updated Branches: refs/heads/master 2b36344f5 - a9b52e562 [SPARK-2342] Evaluation helper's output type doesn't conform to input ty... The function cast doesn't conform to the intention of Those expressions are supposed to be in the same data type, and also the return type. comment Author: Yijie Shen henry.yijies...@gmail.com Closes #1283 from yijieshen/master and squashes the following commits: c7aaa4b [Yijie Shen] [SPARK-2342] Evaluation helper's output type doesn't conform to input type Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a9b52e56 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a9b52e56 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a9b52e56 Branch: refs/heads/master Commit: a9b52e5623f7fc77fca96b095f9eeaef76e35d54 Parents: 2b36344 Author: Yijie Shen henry.yijies...@gmail.com Authored: Thu Jul 3 13:22:13 2014 -0700 Committer: Michael Armbrust mich...@databricks.com Committed: Thu Jul 3 13:22:13 2014 -0700 -- .../org/apache/spark/sql/catalyst/expressions/Expression.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a9b52e56/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index 0411ce3..ba62dab 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -111,7 +111,7 @@ abstract class Expression extends TreeNode[Expression] { } else { e1.dataType match { case n: NumericType = -f.asInstanceOf[(Numeric[n.JvmType], n.JvmType, n.JvmType) = Int]( +f.asInstanceOf[(Numeric[n.JvmType], n.JvmType, n.JvmType) = n.JvmType]( n.numeric, evalE1.asInstanceOf[n.JvmType], evalE2.asInstanceOf[n.JvmType]) case other = sys.error(sType $other does not support numeric operations) }
git commit: [SPARK-2342] Evaluation helper's output type doesn't conform to input ty...
Repository: spark Updated Branches: refs/heads/branch-1.0 fdee6ee06 - 7766c9d26 [SPARK-2342] Evaluation helper's output type doesn't conform to input ty... The function cast doesn't conform to the intention of Those expressions are supposed to be in the same data type, and also the return type. comment Author: Yijie Shen henry.yijies...@gmail.com Closes #1283 from yijieshen/master and squashes the following commits: c7aaa4b [Yijie Shen] [SPARK-2342] Evaluation helper's output type doesn't conform to input type (cherry picked from commit a9b52e5623f7fc77fca96b095f9eeaef76e35d54) Signed-off-by: Michael Armbrust mich...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7766c9d2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7766c9d2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7766c9d2 Branch: refs/heads/branch-1.0 Commit: 7766c9d26c489498b4ad4ff20868e4555990d7eb Parents: fdee6ee Author: Yijie Shen henry.yijies...@gmail.com Authored: Thu Jul 3 13:22:13 2014 -0700 Committer: Michael Armbrust mich...@databricks.com Committed: Thu Jul 3 13:22:24 2014 -0700 -- .../org/apache/spark/sql/catalyst/expressions/Expression.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7766c9d2/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index 0411ce3..ba62dab 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -111,7 +111,7 @@ abstract class Expression extends TreeNode[Expression] { } else { e1.dataType match { case n: NumericType = -f.asInstanceOf[(Numeric[n.JvmType], n.JvmType, n.JvmType) = Int]( +f.asInstanceOf[(Numeric[n.JvmType], n.JvmType, n.JvmType) = n.JvmType]( n.numeric, evalE1.asInstanceOf[n.JvmType], evalE2.asInstanceOf[n.JvmType]) case other = sys.error(sType $other does not support numeric operations) }
git commit: [SPARK-2109] Setting SPARK_MEM for bin/pyspark does not work.
Repository: spark Updated Branches: refs/heads/master a9b52e562 - 731f683b1 [SPARK-2109] Setting SPARK_MEM for bin/pyspark does not work. Trivial fix. Author: Prashant Sharma prashan...@imaginea.com Closes #1050 from ScrapCodes/SPARK-2109/pyspark-script-bug and squashes the following commits: 77072b9 [Prashant Sharma] Changed echos to redirect to STDERR. 13f48a0 [Prashant Sharma] [SPARK-2109] Setting SPARK_MEM for bin/pyspark does not work. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/731f683b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/731f683b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/731f683b Branch: refs/heads/master Commit: 731f683b1bd8abbb83030b6bae14876658bbf098 Parents: a9b52e5 Author: Prashant Sharma prashan...@imaginea.com Authored: Thu Jul 3 15:06:58 2014 -0700 Committer: Patrick Wendell pwend...@gmail.com Committed: Thu Jul 3 15:06:58 2014 -0700 -- bin/compute-classpath.sh | 8 bin/pyspark | 6 +++--- bin/run-example | 10 +- bin/spark-class | 13 ++--- 4 files changed, 18 insertions(+), 19 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/731f683b/bin/compute-classpath.sh -- diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh index 2cf4e38..e81e8c0 100755 --- a/bin/compute-classpath.sh +++ b/bin/compute-classpath.sh @@ -81,10 +81,10 @@ ASSEMBLY_JAR=$(ls $assembly_folder/spark-assembly*hadoop*.jar 2/dev/null) # Verify that versions of java used to build the jars and run Spark are compatible jar_error_check=$($JAR_CMD -tf $ASSEMBLY_JAR nonexistent/class/path 21) if [[ $jar_error_check =~ invalid CEN header ]]; then - echo Loading Spark jar with '$JAR_CMD' failed. - echo This is likely because Spark was compiled with Java 7 and run - echo with Java 6. (see SPARK-1703). Please use Java 7 to run Spark - echo or build Spark with Java 6. + echo Loading Spark jar with '$JAR_CMD' failed. 12 + echo This is likely because Spark was compiled with Java 7 and run 12 + echo with Java 6. (see SPARK-1703). Please use Java 7 to run Spark 12 + echo or build Spark with Java 6. 12 exit 1 fi http://git-wip-us.apache.org/repos/asf/spark/blob/731f683b/bin/pyspark -- diff --git a/bin/pyspark b/bin/pyspark index 0b5ed40..69b056f 100755 --- a/bin/pyspark +++ b/bin/pyspark @@ -26,7 +26,7 @@ export SPARK_HOME=$FWDIR SCALA_VERSION=2.10 if [[ $@ = *--help ]] || [[ $@ = *-h ]]; then - echo Usage: ./bin/pyspark [options] + echo Usage: ./bin/pyspark [options] 12 $FWDIR/bin/spark-submit --help 21 | grep -v Usage 12 exit 0 fi @@ -36,8 +36,8 @@ if [ ! -f $FWDIR/RELEASE ]; then # Exit if the user hasn't compiled Spark ls $FWDIR/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar /dev/null if [[ $? != 0 ]]; then -echo Failed to find Spark assembly in $FWDIR/assembly/target 2 -echo You need to build Spark before running this program 2 +echo Failed to find Spark assembly in $FWDIR/assembly/target 12 +echo You need to build Spark before running this program 12 exit 1 fi fi http://git-wip-us.apache.org/repos/asf/spark/blob/731f683b/bin/run-example -- diff --git a/bin/run-example b/bin/run-example index e7a5fe3..942706d 100755 --- a/bin/run-example +++ b/bin/run-example @@ -27,9 +27,9 @@ if [ -n $1 ]; then EXAMPLE_CLASS=$1 shift else - echo Usage: ./bin/run-example example-class [example-args] - echo - set MASTER=XX to use a specific master - echo - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression) + echo Usage: ./bin/run-example example-class [example-args] 12 + echo - set MASTER=XX to use a specific master 12 + echo - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression) 12 exit 1 fi @@ -40,8 +40,8 @@ elif [ -e $EXAMPLES_DIR/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.ja fi if [[ -z $SPARK_EXAMPLES_JAR ]]; then - echo Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target 2 - echo You need to build Spark before running this program 2 + echo Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target 12 + echo You need to build Spark before running this program 12 exit 1 fi http://git-wip-us.apache.org/repos/asf/spark/blob/731f683b/bin/spark-class -- diff --git a/bin/spark-class b/bin/spark-class index 60d9657..04fa52c 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -33,13 +33,13 @@ export
git commit: [SPARK-2109] Setting SPARK_MEM for bin/pyspark does not work.
Repository: spark Updated Branches: refs/heads/branch-1.0-jdbc cf0d14b01 - 9f7cf5bdb [SPARK-2109] Setting SPARK_MEM for bin/pyspark does not work. Trivial fix. Author: Prashant Sharma prashan...@imaginea.com Closes #1050 from ScrapCodes/SPARK-2109/pyspark-script-bug and squashes the following commits: 77072b9 [Prashant Sharma] Changed echos to redirect to STDERR. 13f48a0 [Prashant Sharma] [SPARK-2109] Setting SPARK_MEM for bin/pyspark does not work. (cherry picked from commit 731f683b1bd8abbb83030b6bae14876658bbf098) Signed-off-by: Patrick Wendell pwend...@gmail.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9f7cf5bd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9f7cf5bd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9f7cf5bd Branch: refs/heads/branch-1.0-jdbc Commit: 9f7cf5bdb2a5118b9d6404078a42eeb4918a2ae5 Parents: cf0d14b Author: Prashant Sharma prashan...@imaginea.com Authored: Thu Jul 3 15:06:58 2014 -0700 Committer: Patrick Wendell pwend...@gmail.com Committed: Thu Jul 3 15:07:26 2014 -0700 -- bin/compute-classpath.sh | 8 bin/pyspark | 6 +++--- bin/run-example | 10 +- bin/spark-class | 13 ++--- 4 files changed, 18 insertions(+), 19 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9f7cf5bd/bin/compute-classpath.sh -- diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh index f94743e..d61d377 100755 --- a/bin/compute-classpath.sh +++ b/bin/compute-classpath.sh @@ -66,10 +66,10 @@ fi # Verify that versions of java used to build the jars and run Spark are compatible jar_error_check=$($JAR_CMD -tf $ASSEMBLY_JAR nonexistent/class/path 21) if [[ $jar_error_check =~ invalid CEN header ]]; then - echo Loading Spark jar with '$JAR_CMD' failed. - echo This is likely because Spark was compiled with Java 7 and run - echo with Java 6. (see SPARK-1703). Please use Java 7 to run Spark - echo or build Spark with Java 6. + echo Loading Spark jar with '$JAR_CMD' failed. 12 + echo This is likely because Spark was compiled with Java 7 and run 12 + echo with Java 6. (see SPARK-1703). Please use Java 7 to run Spark 12 + echo or build Spark with Java 6. 12 exit 1 fi http://git-wip-us.apache.org/repos/asf/spark/blob/9f7cf5bd/bin/pyspark -- diff --git a/bin/pyspark b/bin/pyspark index 114cbbc..acf3b27 100755 --- a/bin/pyspark +++ b/bin/pyspark @@ -26,7 +26,7 @@ export SPARK_HOME=$FWDIR SCALA_VERSION=2.10 if [[ $@ = *--help ]] || [[ $@ = *-h ]]; then - echo Usage: ./bin/pyspark [options] + echo Usage: ./bin/pyspark [options] 12 $FWDIR/bin/spark-submit --help 21 | grep -v Usage 12 exit 0 fi @@ -36,8 +36,8 @@ if [ ! -f $FWDIR/RELEASE ]; then # Exit if the user hasn't compiled Spark ls $FWDIR/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar /dev/null if [[ $? != 0 ]]; then -echo Failed to find Spark assembly in $FWDIR/assembly/target 2 -echo You need to build Spark before running this program 2 +echo Failed to find Spark assembly in $FWDIR/assembly/target 12 +echo You need to build Spark before running this program 12 exit 1 fi fi http://git-wip-us.apache.org/repos/asf/spark/blob/9f7cf5bd/bin/run-example -- diff --git a/bin/run-example b/bin/run-example index e7a5fe3..942706d 100755 --- a/bin/run-example +++ b/bin/run-example @@ -27,9 +27,9 @@ if [ -n $1 ]; then EXAMPLE_CLASS=$1 shift else - echo Usage: ./bin/run-example example-class [example-args] - echo - set MASTER=XX to use a specific master - echo - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression) + echo Usage: ./bin/run-example example-class [example-args] 12 + echo - set MASTER=XX to use a specific master 12 + echo - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression) 12 exit 1 fi @@ -40,8 +40,8 @@ elif [ -e $EXAMPLES_DIR/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.ja fi if [[ -z $SPARK_EXAMPLES_JAR ]]; then - echo Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target 2 - echo You need to build Spark before running this program 2 + echo Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target 12 + echo You need to build Spark before running this program 12 exit 1 fi http://git-wip-us.apache.org/repos/asf/spark/blob/9f7cf5bd/bin/spark-class -- diff --git a/bin/spark-class b/bin/spark-class index e884511..14d4a03 100755 ---
git commit: [SPARK-2109] Setting SPARK_MEM for bin/pyspark does not work.
Repository: spark Updated Branches: refs/heads/branch-1.0 7766c9d26 - 1d3616579 [SPARK-2109] Setting SPARK_MEM for bin/pyspark does not work. Trivial fix. Author: Prashant Sharma prashan...@imaginea.com Closes #1050 from ScrapCodes/SPARK-2109/pyspark-script-bug and squashes the following commits: 77072b9 [Prashant Sharma] Changed echos to redirect to STDERR. 13f48a0 [Prashant Sharma] [SPARK-2109] Setting SPARK_MEM for bin/pyspark does not work. (cherry picked from commit 731f683b1bd8abbb83030b6bae14876658bbf098) Signed-off-by: Patrick Wendell pwend...@gmail.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1d361657 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1d361657 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1d361657 Branch: refs/heads/branch-1.0 Commit: 1d36165796b656bb90409e9e254e27b7e72d36d0 Parents: 7766c9d Author: Prashant Sharma prashan...@imaginea.com Authored: Thu Jul 3 15:06:58 2014 -0700 Committer: Patrick Wendell pwend...@gmail.com Committed: Thu Jul 3 15:09:11 2014 -0700 -- bin/compute-classpath.sh | 8 bin/pyspark | 6 +++--- bin/run-example | 10 +- bin/spark-class | 13 ++--- 4 files changed, 18 insertions(+), 19 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1d361657/bin/compute-classpath.sh -- diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh index 7df43a5..067850f 100755 --- a/bin/compute-classpath.sh +++ b/bin/compute-classpath.sh @@ -65,10 +65,10 @@ fi # Verify that versions of java used to build the jars and run Spark are compatible jar_error_check=$($JAR_CMD -tf $ASSEMBLY_JAR nonexistent/class/path 21) if [[ $jar_error_check =~ invalid CEN header ]]; then - echo Loading Spark jar with '$JAR_CMD' failed. - echo This is likely because Spark was compiled with Java 7 and run - echo with Java 6. (see SPARK-1703). Please use Java 7 to run Spark - echo or build Spark with Java 6. + echo Loading Spark jar with '$JAR_CMD' failed. 12 + echo This is likely because Spark was compiled with Java 7 and run 12 + echo with Java 6. (see SPARK-1703). Please use Java 7 to run Spark 12 + echo or build Spark with Java 6. 12 exit 1 fi http://git-wip-us.apache.org/repos/asf/spark/blob/1d361657/bin/pyspark -- diff --git a/bin/pyspark b/bin/pyspark index 114cbbc..acf3b27 100755 --- a/bin/pyspark +++ b/bin/pyspark @@ -26,7 +26,7 @@ export SPARK_HOME=$FWDIR SCALA_VERSION=2.10 if [[ $@ = *--help ]] || [[ $@ = *-h ]]; then - echo Usage: ./bin/pyspark [options] + echo Usage: ./bin/pyspark [options] 12 $FWDIR/bin/spark-submit --help 21 | grep -v Usage 12 exit 0 fi @@ -36,8 +36,8 @@ if [ ! -f $FWDIR/RELEASE ]; then # Exit if the user hasn't compiled Spark ls $FWDIR/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar /dev/null if [[ $? != 0 ]]; then -echo Failed to find Spark assembly in $FWDIR/assembly/target 2 -echo You need to build Spark before running this program 2 +echo Failed to find Spark assembly in $FWDIR/assembly/target 12 +echo You need to build Spark before running this program 12 exit 1 fi fi http://git-wip-us.apache.org/repos/asf/spark/blob/1d361657/bin/run-example -- diff --git a/bin/run-example b/bin/run-example index e7a5fe3..942706d 100755 --- a/bin/run-example +++ b/bin/run-example @@ -27,9 +27,9 @@ if [ -n $1 ]; then EXAMPLE_CLASS=$1 shift else - echo Usage: ./bin/run-example example-class [example-args] - echo - set MASTER=XX to use a specific master - echo - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression) + echo Usage: ./bin/run-example example-class [example-args] 12 + echo - set MASTER=XX to use a specific master 12 + echo - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression) 12 exit 1 fi @@ -40,8 +40,8 @@ elif [ -e $EXAMPLES_DIR/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.ja fi if [[ -z $SPARK_EXAMPLES_JAR ]]; then - echo Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target 2 - echo You need to build Spark before running this program 2 + echo Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target 12 + echo You need to build Spark before running this program 12 exit 1 fi http://git-wip-us.apache.org/repos/asf/spark/blob/1d361657/bin/spark-class -- diff --git a/bin/spark-class b/bin/spark-class index e884511..14d4a03 100755 ---
git commit: [HOTFIX] Synchronize on SQLContext.settings in tests.
Repository: spark Updated Branches: refs/heads/master 731f683b1 - d4c30cd99 [HOTFIX] Synchronize on SQLContext.settings in tests. Let's see if this fixes the ongoing series of test failures in a master build machine (https://amplab.cs.berkeley.edu/jenkins/job/Spark-Master-SBT-pre-YARN/SPARK_HADOOP_VERSION=1.0.4,label=centos/81/). pwendell marmbrus Author: Zongheng Yang zonghen...@gmail.com Closes #1277 from concretevitamin/test-fix and squashes the following commits: 28c88bd [Zongheng Yang] Synchronize on SQLContext.settings in tests. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d4c30cd9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d4c30cd9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d4c30cd9 Branch: refs/heads/master Commit: d4c30cd9918e18dde2a52909e36eaef6eb5996ab Parents: 731f683 Author: Zongheng Yang zonghen...@gmail.com Authored: Thu Jul 3 17:37:53 2014 -0700 Committer: Michael Armbrust mich...@databricks.com Committed: Thu Jul 3 17:37:53 2014 -0700 -- .../scala/org/apache/spark/sql/SQLConf.scala| 2 +- .../scala/org/apache/spark/sql/JoinSuite.scala | 40 ++-- .../org/apache/spark/sql/SQLConfSuite.scala | 64 +- .../org/apache/spark/sql/SQLQuerySuite.scala| 68 ++-- 4 files changed, 91 insertions(+), 83 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d4c30cd9/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala index 2fe7f94..3b5abab 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala @@ -50,7 +50,7 @@ trait SQLConf { /** ** SQLConf functionality methods */ @transient - private val settings = java.util.Collections.synchronizedMap( + protected[sql] val settings = java.util.Collections.synchronizedMap( new java.util.HashMap[String, String]()) def set(props: Properties): Unit = { http://git-wip-us.apache.org/repos/asf/spark/blob/d4c30cd9/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala index 3d7d5ee..054b14f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala @@ -39,25 +39,27 @@ class JoinSuite extends QueryTest { test(plans broadcast hash join, given hints) { def mkTest(buildSide: BuildSide, leftTable: String, rightTable: String) = { - TestSQLContext.set(spark.sql.join.broadcastTables, -s${if (buildSide == BuildRight) rightTable else leftTable}) - val rdd = sql(sSELECT * FROM $leftTable JOIN $rightTable ON key = a) - // Using `sparkPlan` because for relevant patterns in HashJoin to be - // matched, other strategies need to be applied. - val physical = rdd.queryExecution.sparkPlan - val bhj = physical.collect { case j: BroadcastHashJoin if j.buildSide == buildSide = j } - - assert(bhj.size === 1, planner does not pick up hint to generate broadcast hash join) - checkAnswer( -rdd, -Seq( - (1, 1, 1, 1), - (1, 1, 1, 2), - (2, 2, 2, 1), - (2, 2, 2, 2), - (3, 3, 3, 1), - (3, 3, 3, 2) -)) + TestSQLContext.settings.synchronized { +TestSQLContext.set(spark.sql.join.broadcastTables, + s${if (buildSide == BuildRight) rightTable else leftTable}) +val rdd = sql( sSELECT * FROM $leftTable JOIN $rightTable ON key = a) +// Using `sparkPlan` because for relevant patterns in HashJoin to be +// matched, other strategies need to be applied. +val physical = rdd.queryExecution.sparkPlan +val bhj = physical.collect { case j: BroadcastHashJoin if j.buildSide == buildSide = j} + +assert(bhj.size === 1, planner does not pick up hint to generate broadcast hash join) +checkAnswer( + rdd, + Seq( +(1, 1, 1, 1), +(1, 1, 1, 2), +(2, 2, 2, 1), +(2, 2, 2, 2), +(3, 3, 3, 1), +(3, 3, 3, 2) + )) + } } mkTest(BuildRight, testData, testData2) http://git-wip-us.apache.org/repos/asf/spark/blob/d4c30cd9/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala -- diff --git
git commit: Streaming programming guide typos
Repository: spark Updated Branches: refs/heads/master d4c30cd99 - fdc4c112e Streaming programming guide typos Fix a bad Java code sample and a broken link in the streaming programming guide. Author: Clément MATHIEU clem...@unportant.info Closes #1286 from cykl/streaming-programming-guide-typos and squashes the following commits: b0908cb [Clément MATHIEU] Fix broken URL 9d3c535 [Clément MATHIEU] Spark streaming requires at least two working threads (scala version was OK) Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fdc4c112 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fdc4c112 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fdc4c112 Branch: refs/heads/master Commit: fdc4c112e7c2ac585d108d03209a642aa8bab7c8 Parents: d4c30cd Author: Clément MATHIEU clem...@unportant.info Authored: Thu Jul 3 18:31:18 2014 -0700 Committer: Tathagata Das tathagata.das1...@gmail.com Committed: Thu Jul 3 18:31:18 2014 -0700 -- docs/streaming-programming-guide.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fdc4c112/docs/streaming-programming-guide.md -- diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md index ce8e58d..90a0eef 100644 --- a/docs/streaming-programming-guide.md +++ b/docs/streaming-programming-guide.md @@ -148,7 +148,7 @@ import org.apache.spark.streaming.*; import org.apache.spark.streaming.api.java.*; import scala.Tuple2; // Create a StreamingContext with a local master -JavaStreamingContext jssc = new JavaStreamingContext(local, JavaNetworkWordCount, new Duration(1000)) +JavaStreamingContext jssc = new JavaStreamingContext(local[2], JavaNetworkWordCount, new Duration(1000)) {% endhighlight %} Using this context, we then create a new DStream @@ -216,7 +216,7 @@ jssc.awaitTermination(); // Wait for the computation to terminate {% endhighlight %} The complete code can be found in the Spark Streaming example -[JavaNetworkWordCount]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/java/index.html?org/apache/spark/examples/streaming/JavaNetworkWordCount.java). +[JavaNetworkWordCount]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/java/org/apache/spark/examples/streaming/JavaNetworkWordCount.java). br /div
git commit: [SPARK-1097] Workaround Hadoop conf ConcurrentModification issue
Repository: spark Updated Branches: refs/heads/master fdc4c112e - 5fa0a0576 [SPARK-1097] Workaround Hadoop conf ConcurrentModification issue Workaround Hadoop conf ConcurrentModification issue Author: Raymond Liu raymond@intel.com Closes #1273 from colorant/hadoopRDD and squashes the following commits: 994e98b [Raymond Liu] Address comments e2cda3d [Raymond Liu] Workaround Hadoop conf ConcurrentModification issue Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5fa0a057 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5fa0a057 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5fa0a057 Branch: refs/heads/master Commit: 5fa0a05763ab1d527efe20e3b10539ac5ffc36de Parents: fdc4c11 Author: Raymond Liu raymond@intel.com Authored: Thu Jul 3 19:24:22 2014 -0700 Committer: Aaron Davidson aa...@databricks.com Committed: Thu Jul 3 19:24:22 2014 -0700 -- core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5fa0a057/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala -- diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala index 98dcbf4..0410285 100644 --- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala @@ -141,8 +141,8 @@ class HadoopRDD[K, V]( // local process. The local cache is accessed through HadoopRDD.putCachedMetadata(). // The caching helps minimize GC, since a JobConf can contain ~10KB of temporary objects. // synchronize to prevent ConcurrentModificationException (Spark-1097, Hadoop-10456) - broadcastedConf.synchronized { -val newJobConf = new JobConf(broadcastedConf.value.value) + conf.synchronized { +val newJobConf = new JobConf(conf) initLocalJobConfFuncOpt.map(f = f(newJobConf)) HadoopRDD.putCachedMetadata(jobConfCacheKey, newJobConf) newJobConf
git commit: [SPARK-2350] Don't NPE while launching drivers
Repository: spark Updated Branches: refs/heads/branch-1.0 d2f253467 - 27a2afed6 [SPARK-2350] Don't NPE while launching drivers Prior to this change, we could throw a NPE if we launch a driver while another one is waiting, because removing from an iterator while iterating over it is not safe. Author: Aaron Davidson aa...@databricks.com Closes #1289 from aarondav/master-fail and squashes the following commits: 1cf1cf4 [Aaron Davidson] SPARK-2350: Don't NPE while launching drivers (cherry picked from commit 586feb5c9528042420f678f78bacb6c254a5eaf8) Signed-off-by: Patrick Wendell pwend...@gmail.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/27a2afed Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/27a2afed Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/27a2afed Branch: refs/heads/branch-1.0 Commit: 27a2afed60035474a100436798c8048506addb54 Parents: d2f2534 Author: Aaron Davidson aa...@databricks.com Authored: Thu Jul 3 22:31:41 2014 -0700 Committer: Patrick Wendell pwend...@gmail.com Committed: Thu Jul 3 22:32:05 2014 -0700 -- core/src/main/scala/org/apache/spark/deploy/master/Master.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/27a2afed/core/src/main/scala/org/apache/spark/deploy/master/Master.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala index 33ffcbd..4ed4827 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala @@ -481,7 +481,7 @@ private[spark] class Master( // First schedule drivers, they take strict precedence over applications val shuffledWorkers = Random.shuffle(workers) // Randomization helps balance drivers for (worker - shuffledWorkers if worker.state == WorkerState.ALIVE) { - for (driver - waitingDrivers) { + for (driver - List(waitingDrivers: _*)) { // iterate over a copy of waitingDrivers if (worker.memoryFree = driver.desc.mem worker.coresFree = driver.desc.cores) { launchDriver(worker, driver) waitingDrivers -= driver
git commit: [SPARK-2350] Don't NPE while launching drivers
Repository: spark Updated Branches: refs/heads/master 5fa0a0576 - 586feb5c9 [SPARK-2350] Don't NPE while launching drivers Prior to this change, we could throw a NPE if we launch a driver while another one is waiting, because removing from an iterator while iterating over it is not safe. Author: Aaron Davidson aa...@databricks.com Closes #1289 from aarondav/master-fail and squashes the following commits: 1cf1cf4 [Aaron Davidson] SPARK-2350: Don't NPE while launching drivers Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/586feb5c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/586feb5c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/586feb5c Branch: refs/heads/master Commit: 586feb5c9528042420f678f78bacb6c254a5eaf8 Parents: 5fa0a05 Author: Aaron Davidson aa...@databricks.com Authored: Thu Jul 3 22:31:41 2014 -0700 Committer: Patrick Wendell pwend...@gmail.com Committed: Thu Jul 3 22:31:41 2014 -0700 -- core/src/main/scala/org/apache/spark/deploy/master/Master.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/586feb5c/core/src/main/scala/org/apache/spark/deploy/master/Master.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala index 11545b8..a304102 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala @@ -481,7 +481,7 @@ private[spark] class Master( // First schedule drivers, they take strict precedence over applications val shuffledWorkers = Random.shuffle(workers) // Randomization helps balance drivers for (worker - shuffledWorkers if worker.state == WorkerState.ALIVE) { - for (driver - waitingDrivers) { + for (driver - List(waitingDrivers: _*)) { // iterate over a copy of waitingDrivers if (worker.memoryFree = driver.desc.mem worker.coresFree = driver.desc.cores) { launchDriver(worker, driver) waitingDrivers -= driver
git commit: [SPARK-2307][Reprise] Correctly report RDD blocks on SparkUI
Repository: spark Updated Branches: refs/heads/branch-1.0 27a2afed6 - cf1d46e46 [SPARK-2307][Reprise] Correctly report RDD blocks on SparkUI **Problem.** The existing code in `ExecutorPage.scala` requires a linear scan through all the blocks to filter out the uncached ones. Every refresh could be expensive if there are many blocks and many executors. **Solution.** The proper semantics should be the following: `StorageStatusListener` should contain only block statuses that are cached. This means as soon as a block is unpersisted by any mean, its status should be removed. This is reflected in the changes made in `StorageStatusListener.scala`. Further, the `StorageTab` must stop relying on the `StorageStatusListener` changing a dropped block's status to `StorageLevel.NONE` (which no longer happens). This is reflected in the changes made in `StorageTab.scala` and `StorageUtils.scala`. -- If you have been following this chain of PRs like pwendell, you will quickly notice that this reverts the changes in #1249, which reverts the changes in #1080. In other words, we are adding back the changes from #1080, and fixing SPARK-2307 on top of those changes. Please ask questions if you are confused. Author: Andrew Or andrewo...@gmail.com Closes #1255 from andrewor14/storage-ui-fix-reprise and squashes the following commits: 45416fa [Andrew Or] Merge branch 'master' of github.com:apache/spark into storage-ui-fix-reprise a82ea25 [Andrew Or] Add tests for StorageStatusListener 8773b01 [Andrew Or] Update comment / minor changes 3afde3f [Andrew Or] Correctly report the number of blocks on SparkUI (cherry picked from commit 3894a49be9b532cc026d908a0f49bca850504498) Signed-off-by: Patrick Wendell pwend...@gmail.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cf1d46e4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cf1d46e4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cf1d46e4 Branch: refs/heads/branch-1.0 Commit: cf1d46e46518c818d20f07cdaabbd8069d877ca8 Parents: 27a2afe Author: Andrew Or andrewo...@gmail.com Authored: Thu Jul 3 22:48:23 2014 -0700 Committer: Patrick Wendell pwend...@gmail.com Committed: Thu Jul 3 22:48:33 2014 -0700 -- .../spark/storage/StorageStatusListener.scala | 17 ++- .../org/apache/spark/storage/StorageUtils.scala | 15 +- .../apache/spark/ui/exec/ExecutorsPage.scala| 4 +- .../org/apache/spark/ui/exec/ExecutorsTab.scala | 4 +- .../apache/spark/ui/storage/StorageTab.scala| 15 +- .../storage/StorageStatusListenerSuite.scala| 152 +++ 6 files changed, 184 insertions(+), 23 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/cf1d46e4/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala -- diff --git a/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala b/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala index a6e6627..41c960c 100644 --- a/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala +++ b/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala @@ -28,26 +28,31 @@ import org.apache.spark.scheduler._ */ @DeveloperApi class StorageStatusListener extends SparkListener { - private val executorIdToStorageStatus = mutable.Map[String, StorageStatus]() + // This maintains only blocks that are cached (i.e. storage level is not StorageLevel.NONE) + private[storage] val executorIdToStorageStatus = mutable.Map[String, StorageStatus]() def storageStatusList = executorIdToStorageStatus.values.toSeq /** Update storage status list to reflect updated block statuses */ - def updateStorageStatus(execId: String, updatedBlocks: Seq[(BlockId, BlockStatus)]) { -val filteredStatus = storageStatusList.find(_.blockManagerId.executorId == execId) + private def updateStorageStatus(execId: String, updatedBlocks: Seq[(BlockId, BlockStatus)]) { +val filteredStatus = executorIdToStorageStatus.get(execId) filteredStatus.foreach { storageStatus = updatedBlocks.foreach { case (blockId, updatedStatus) = -storageStatus.blocks(blockId) = updatedStatus +if (updatedStatus.storageLevel == StorageLevel.NONE) { + storageStatus.blocks.remove(blockId) +} else { + storageStatus.blocks(blockId) = updatedStatus +} } } } /** Update storage status list to reflect the removal of an RDD from the cache */ - def updateStorageStatus(unpersistedRDDId: Int) { + private def updateStorageStatus(unpersistedRDDId: Int) { storageStatusList.foreach { storageStatus = val unpersistedBlocksIds = storageStatus.rddBlocks.keys.filter(_.rddId ==