spark git commit: [SPARK-5141][SQL]CaseInsensitiveMap throws java.io.NotSerializableException
Repository: spark Updated Branches: refs/heads/master 4554529dc - 545dfcb92 [SPARK-5141][SQL]CaseInsensitiveMap throws java.io.NotSerializableException CaseInsensitiveMap throws java.io.NotSerializableException. Author: luogankun luogan...@gmail.com Closes #3944 from luogankun/SPARK-5141 and squashes the following commits: b6d63d5 [luogankun] [SPARK-5141]CaseInsensitiveMap throws java.io.NotSerializableException Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/545dfcb9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/545dfcb9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/545dfcb9 Branch: refs/heads/master Commit: 545dfcb92f2ff82a51877d35d9669094ea81f466 Parents: 4554529 Author: luogankun luogan...@gmail.com Authored: Fri Jan 9 20:38:41 2015 -0800 Committer: Reynold Xin r...@databricks.com Committed: Fri Jan 9 20:38:41 2015 -0800 -- sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/545dfcb9/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala index 8a66ac3..364bace 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala @@ -110,7 +110,8 @@ private[sql] case class CreateTableUsing( /** * Builds a map in which keys are case insensitive */ -protected class CaseInsensitiveMap(map: Map[String, String]) extends Map[String, String] { +protected class CaseInsensitiveMap(map: Map[String, String]) extends Map[String, String] + with Serializable { val baseMap = map.map(kv = kv.copy(_1 = kv._1.toLowerCase)) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-4990][Deploy]to find default properties file, search SPARK_CONF_DIR first
Repository: spark Updated Branches: refs/heads/master 4e1f12d99 - 8782eb992 [SPARK-4990][Deploy]to find default properties file, search SPARK_CONF_DIR first https://issues.apache.org/jira/browse/SPARK-4990 Author: WangTaoTheTonic barneystin...@aliyun.com Author: WangTao barneystin...@aliyun.com Closes #3823 from WangTaoTheTonic/SPARK-4990 and squashes the following commits: 133c43e [WangTao] Update spark-submit2.cmd b1ab402 [WangTao] Update spark-submit 4cc7f34 [WangTaoTheTonic] rebase 55300bc [WangTaoTheTonic] use export to make it global d8d3cb7 [WangTaoTheTonic] remove blank line 07b9ebf [WangTaoTheTonic] check SPARK_CONF_DIR instead of checking properties file c5a85eb [WangTaoTheTonic] to find default properties file, search SPARK_CONF_DIR first Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8782eb99 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8782eb99 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8782eb99 Branch: refs/heads/master Commit: 8782eb992f461502238c41ece3a3002efa67a792 Parents: 4e1f12d Author: WangTaoTheTonic barneystin...@aliyun.com Authored: Fri Jan 9 17:10:02 2015 -0800 Committer: Andrew Or and...@databricks.com Committed: Fri Jan 9 17:10:02 2015 -0800 -- bin/spark-submit | 5 - bin/spark-submit2.cmd | 6 +- 2 files changed, 9 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8782eb99/bin/spark-submit -- diff --git a/bin/spark-submit b/bin/spark-submit index aefd38a..3e5cbdb 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -44,7 +44,10 @@ while (($#)); do shift done -DEFAULT_PROPERTIES_FILE=$SPARK_HOME/conf/spark-defaults.conf +if [ -z $SPARK_CONF_DIR ]; then + export SPARK_CONF_DIR=$SPARK_HOME/conf +fi +DEFAULT_PROPERTIES_FILE=$SPARK_CONF_DIR/spark-defaults.conf if [ $MASTER == yarn-cluster ]; then SPARK_SUBMIT_DEPLOY_MODE=cluster fi http://git-wip-us.apache.org/repos/asf/spark/blob/8782eb99/bin/spark-submit2.cmd -- diff --git a/bin/spark-submit2.cmd b/bin/spark-submit2.cmd index daf0284..12244a9 100644 --- a/bin/spark-submit2.cmd +++ b/bin/spark-submit2.cmd @@ -24,7 +24,11 @@ set ORIG_ARGS=%* rem Reset the values of all variables used set SPARK_SUBMIT_DEPLOY_MODE=client -set SPARK_SUBMIT_PROPERTIES_FILE=%SPARK_HOME%\conf\spark-defaults.conf + +if not defined %SPARK_CONF_DIR% ( + set SPARK_CONF_DIR=%SPARK_HOME%\conf +) +set SPARK_SUBMIT_PROPERTIES_FILE=%SPARK_CONF_DIR%\spark-defaults.conf set SPARK_SUBMIT_DRIVER_MEMORY= set SPARK_SUBMIT_LIBRARY_PATH= set SPARK_SUBMIT_CLASSPATH= - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-4406] [MLib] FIX: Validate k in SVD
Repository: spark Updated Branches: refs/heads/master 8782eb992 - 4554529dc [SPARK-4406] [MLib] FIX: Validate k in SVD Raise exception when k is non-positive in SVD Author: MechCoder manojkumarsivaraj...@gmail.com Closes #3945 from MechCoder/spark-4406 and squashes the following commits: 64e6d2d [MechCoder] TST: Add better test errors and messages 12dae73 [MechCoder] [SPARK-4406] FIX: Validate k in SVD Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4554529d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4554529d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4554529d Branch: refs/heads/master Commit: 4554529dce8fe8ca937d887109ef072eef52bf51 Parents: 8782eb9 Author: MechCoder manojkumarsivaraj...@gmail.com Authored: Fri Jan 9 17:45:18 2015 -0800 Committer: Xiangrui Meng m...@databricks.com Committed: Fri Jan 9 17:45:18 2015 -0800 -- .../spark/mllib/linalg/distributed/IndexedRowMatrix.scala| 3 +++ .../apache/spark/mllib/linalg/distributed/RowMatrix.scala| 2 +- .../mllib/linalg/distributed/IndexedRowMatrixSuite.scala | 7 +++ .../spark/mllib/linalg/distributed/RowMatrixSuite.scala | 8 4 files changed, 19 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4554529d/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala index 36d8cad..181f507 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala @@ -102,6 +102,9 @@ class IndexedRowMatrix( k: Int, computeU: Boolean = false, rCond: Double = 1e-9): SingularValueDecomposition[IndexedRowMatrix, Matrix] = { + +val n = numCols().toInt +require(k 0 k = n, sRequested k singular values but got k=$k and numCols=$n.) val indices = rows.map(_.index) val svd = toRowMatrix().computeSVD(k, computeU, rCond) val U = if (computeU) { http://git-wip-us.apache.org/repos/asf/spark/blob/4554529d/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala index fbd35e3..d5abba6 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala @@ -212,7 +212,7 @@ class RowMatrix( tol: Double, mode: String): SingularValueDecomposition[RowMatrix, Matrix] = { val n = numCols().toInt -require(k 0 k = n, sRequest up to n singular values but got k=$k and n=$n.) +require(k 0 k = n, sRequested k singular values but got k=$k and numCols=$n.) object SVDMode extends Enumeration { val LocalARPACK, LocalLAPACK, DistARPACK = Value http://git-wip-us.apache.org/repos/asf/spark/blob/4554529d/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala -- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala index e25bc02..741cd49 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala @@ -113,6 +113,13 @@ class IndexedRowMatrixSuite extends FunSuite with MLlibTestSparkContext { assert(closeToZero(U * brzDiag(s) * V.t - localA)) } + test(validate k in svd) { +val A = new IndexedRowMatrix(indexedRows) +intercept[IllegalArgumentException] { + A.computeSVD(-1) +} + } + def closeToZero(G: BDM[Double]): Boolean = { G.valuesIterator.map(math.abs).sum 1e-6 } http://git-wip-us.apache.org/repos/asf/spark/blob/4554529d/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala -- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala index dbf55ff..3309713 100644 ---
spark git commit: [Spark-3490] Disable SparkUI for tests (backport into 0.9)
Repository: spark Updated Branches: refs/heads/branch-0.9 7d007d352 - 3fba7b7bc [Spark-3490] Disable SparkUI for tests (backport into 0.9) Branch-1.2 #2363 (original) Branch-1.1 #2415 Branch-1.0 #3959 Branch-0.9 #3961 (this PR) Author: Andrew Or and...@databricks.com Closes #3961 from andrewor14/ui-ports-0.9 and squashes the following commits: 8644997 [Andrew Or] Disable UI for tests Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3fba7b7b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3fba7b7b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3fba7b7b Branch: refs/heads/branch-0.9 Commit: 3fba7b7bc054648742ffbb76042aeb58ae9139c2 Parents: 7d007d3 Author: Andrew Or and...@databricks.com Authored: Fri Jan 9 10:23:18 2015 -0800 Committer: Andrew Or and...@databricks.com Committed: Fri Jan 9 10:23:18 2015 -0800 -- .../main/scala/org/apache/spark/SparkContext.scala| 14 ++ .../scheduler/cluster/SimrSchedulerBackend.scala | 5 +++-- .../cluster/SparkDeploySchedulerBackend.scala | 5 +++-- pom.xml | 4 project/SparkBuild.scala | 1 + .../apache/spark/deploy/yarn/ApplicationMaster.scala | 2 +- .../apache/spark/deploy/yarn/ApplicationMaster.scala | 2 +- 7 files changed, 23 insertions(+), 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3fba7b7b/core/src/main/scala/org/apache/spark/SparkContext.scala -- diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 566472e..1cd3703 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -155,8 +155,14 @@ class SparkContext( new MetadataCleaner(MetadataCleanerType.SPARK_CONTEXT, this.cleanup, conf) // Initialize the Spark UI - private[spark] val ui = new SparkUI(this) - ui.bind() + private[spark] val ui: Option[SparkUI] = +if (conf.getBoolean(spark.ui.enabled, true)) { + Some(new SparkUI(this)) +} else { + // For tests, do not enable the UI + None +} + ui.foreach(_.bind()) val startTime = System.currentTimeMillis() @@ -202,7 +208,7 @@ class SparkContext( @volatile private[spark] var dagScheduler = new DAGScheduler(taskScheduler) dagScheduler.start() - ui.start() + ui.foreach(_.start()) /** A default Hadoop Configuration for the Hadoop code (e.g. file systems) that we reuse. */ val hadoopConfiguration = { @@ -777,7 +783,7 @@ class SparkContext( /** Shut down the SparkContext. */ def stop() { -ui.stop() +ui.foreach(_.stop()) // Do this only if not stopped already - best case effort. // prevent NPE if stopped more than once. val dagSchedulerCopy = dagScheduler http://git-wip-us.apache.org/repos/asf/spark/blob/3fba7b7b/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala index d99c761..149c9fd 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala @@ -44,16 +44,17 @@ private[spark] class SimrSchedulerBackend( val conf = new Configuration() val fs = FileSystem.get(conf) +val appUIAddress = sc.ui.map(_.appUIAddress).getOrElse() logInfo(Writing to HDFS file: + driverFilePath) logInfo(Writing Akka address: + driverUrl) -logInfo(Writing Spark UI Address: + sc.ui.appUIAddress) +logInfo(Writing Spark UI Address: + appUIAddress) // Create temporary file to prevent race condition where executors get empty driverUrl file val temp = fs.create(tmpPath, true) temp.writeUTF(driverUrl) temp.writeInt(maxCores) -temp.writeUTF(sc.ui.appUIAddress) +temp.writeUTF(appUIAddress) temp.close() // Atomic rename http://git-wip-us.apache.org/repos/asf/spark/blob/3fba7b7b/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala index faa6e1e..dfcb22a 100644 ---
spark git commit: [SPARK-5145][Mllib] Add BLAS.dsyr and use it in GaussianMixtureEM
Repository: spark Updated Branches: refs/heads/master b6aa55730 - e9ca16ec9 [SPARK-5145][Mllib] Add BLAS.dsyr and use it in GaussianMixtureEM This pr uses BLAS.dsyr to replace few implementations in GaussianMixtureEM. Author: Liang-Chi Hsieh vii...@gmail.com Closes #3949 from viirya/blas_dsyr and squashes the following commits: 4e4d6cf [Liang-Chi Hsieh] Add unit test. Rename function name, modify doc and style. 3f57fd2 [Liang-Chi Hsieh] Add BLAS.dsyr and use it in GaussianMixtureEM. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e9ca16ec Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e9ca16ec Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e9ca16ec Branch: refs/heads/master Commit: e9ca16ec943b9553056482d0c085eacb6046821e Parents: b6aa557 Author: Liang-Chi Hsieh vii...@gmail.com Authored: Fri Jan 9 10:27:33 2015 -0800 Committer: Xiangrui Meng m...@databricks.com Committed: Fri Jan 9 10:27:33 2015 -0800 -- .../mllib/clustering/GaussianMixtureEM.scala| 10 +++-- .../org/apache/spark/mllib/linalg/BLAS.scala| 26 + .../apache/spark/mllib/linalg/BLASSuite.scala | 41 3 files changed, 73 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e9ca16ec/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureEM.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureEM.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureEM.scala index bdf984a..3a6c0e6 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureEM.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureEM.scala @@ -21,7 +21,7 @@ import scala.collection.mutable.IndexedSeq import breeze.linalg.{DenseVector = BreezeVector, DenseMatrix = BreezeMatrix, diag, Transpose} import org.apache.spark.rdd.RDD -import org.apache.spark.mllib.linalg.{Matrices, Vector, Vectors} +import org.apache.spark.mllib.linalg.{Matrices, Vector, Vectors, DenseVector, DenseMatrix, BLAS} import org.apache.spark.mllib.stat.impl.MultivariateGaussian import org.apache.spark.mllib.util.MLUtils @@ -151,9 +151,10 @@ class GaussianMixtureEM private ( var i = 0 while (i k) { val mu = sums.means(i) / sums.weights(i) -val sigma = sums.sigmas(i) / sums.weights(i) - mu * new Transpose(mu) // TODO: Use BLAS.dsyr +BLAS.syr(-sums.weights(i), Vectors.fromBreeze(mu).asInstanceOf[DenseVector], + Matrices.fromBreeze(sums.sigmas(i)).asInstanceOf[DenseMatrix]) weights(i) = sums.weights(i) / sumWeights -gaussians(i) = new MultivariateGaussian(mu, sigma) +gaussians(i) = new MultivariateGaussian(mu, sums.sigmas(i) / sums.weights(i)) i = i + 1 } @@ -211,7 +212,8 @@ private object ExpectationSum { p(i) /= pSum sums.weights(i) += p(i) sums.means(i) += x * p(i) - sums.sigmas(i) += xxt * p(i) // TODO: use BLAS.dsyr + BLAS.syr(p(i), Vectors.fromBreeze(x).asInstanceOf[DenseVector], +Matrices.fromBreeze(sums.sigmas(i)).asInstanceOf[DenseMatrix]) i = i + 1 } sums http://git-wip-us.apache.org/repos/asf/spark/blob/e9ca16ec/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala index 9fed513..3414dac 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala @@ -228,6 +228,32 @@ private[spark] object BLAS extends Serializable with Logging { } _nativeBLAS } + + /** + * A := alpha * x * x^T^ + A + * @param alpha a real scalar that will be multiplied to x * x^T^. + * @param x the vector x that contains the n elements. + * @param A the symmetric matrix A. Size of n x n. + */ + def syr(alpha: Double, x: DenseVector, A: DenseMatrix) { +val mA = A.numRows +val nA = A.numCols +require(mA == nA, sA is not a symmetric matrix. A: $mA x $nA) +require(mA == x.size, sThe size of x doesn't match the rank of A. A: $mA x $nA, x: ${x.size}) + +nativeBLAS.dsyr(U, x.size, alpha, x.values, 1, A.values, nA) + +// Fill lower triangular part of A +var i = 0 +while (i mA) { + var j = i + 1 + while (j nA) { +A(j, i) = A(i, j) +j += 1 + } + i += 1 +} + } /** * C := alpha * A * B + beta * C
spark git commit: [Minor] Fix test RetryingBlockFetcherSuite after changed config name
Repository: spark Updated Branches: refs/heads/master f3da4bd72 - b4034c3f8 [Minor] Fix test RetryingBlockFetcherSuite after changed config name Flakey due to the default retry interval being the same as our test's wait timeout. Author: Aaron Davidson aa...@databricks.com Closes #3972 from aarondav/fix-test and squashes the following commits: db77cab [Aaron Davidson] [Minor] Fix test after changed config name Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b4034c3f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b4034c3f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b4034c3f Branch: refs/heads/master Commit: b4034c3f889bf24f60eb806802866b48e4cbe55c Parents: f3da4bd Author: Aaron Davidson aa...@databricks.com Authored: Fri Jan 9 09:20:16 2015 -0800 Committer: Aaron Davidson aa...@databricks.com Committed: Fri Jan 9 09:20:16 2015 -0800 -- .../apache/spark/network/shuffle/RetryingBlockFetcherSuite.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b4034c3f/network/shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockFetcherSuite.java -- diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockFetcherSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockFetcherSuite.java index 0191fe5..1ad0d72 100644 --- a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockFetcherSuite.java +++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockFetcherSuite.java @@ -54,13 +54,13 @@ public class RetryingBlockFetcherSuite { @Before public void beforeEach() { System.setProperty(spark.shuffle.io.maxRetries, 2); -System.setProperty(spark.shuffle.io.retryWaitMs, 0); +System.setProperty(spark.shuffle.io.retryWait, 0); } @After public void afterEach() { System.clearProperty(spark.shuffle.io.maxRetries); -System.clearProperty(spark.shuffle.io.retryWaitMs); +System.clearProperty(spark.shuffle.io.retryWait); } @Test - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: SPARK-5136 [DOCS] Improve documentation around setting up Spark IntelliJ project
Repository: spark Updated Branches: refs/heads/master b4034c3f8 - 547df9771 SPARK-5136 [DOCS] Improve documentation around setting up Spark IntelliJ project This PR simply points to the IntelliJ wiki page instead of also including IntelliJ notes in the docs. The intent however is to also update the wiki page with updated tips. This is the text I propose for the IntelliJ section on the wiki. I realize it omits some of the existing instructions on the wiki, about enabling Hive, but I think those are actually optional. -- IntelliJ supports both Maven- and SBT-based projects. It is recommended, however, to import Spark as a Maven project. Choose Import Project... from the File menu, and select the `pom.xml` file in the Spark root directory. It is fine to leave all settings at their default values in the Maven import wizard, with two caveats. First, it is usually useful to enable Import Maven projects automatically, sincchanges to the project structure will automatically update the IntelliJ project. Second, note the step that prompts you to choose active Maven build profiles. As documented above, some build configuration require specific profiles to be enabled. The same profiles that are enabled with `-P[profile name]` above may be enabled on this screen. For example, if developing for Hadoop 2.4 with YARN support, enable profiles `yarn` and `hadoop-2.4`. These selections can be changed later by accessing the Maven Projects tool window from the View menu, and expanding the Profiles section. Rebuild Project can fail the first time the project is compiled, because generate source files are not automatically generated. Try clicking the Generate Sources and Update Folders For All Projects button in the Maven Projects tool window to manually generate these sources. Compilation may fail with an error like scalac: bad option: -P:/home/jakub/.m2/repository/org/scalamacros/paradise_2.10.4/2.0.1/paradise_2.10.4-2.0.1.jar. If so, go to Preferences Build, Execution, Deployment Scala Compiler and clear the Additional compiler options field. It will work then although the option will come back when the project reimports. Author: Sean Owen so...@cloudera.com Closes #3952 from srowen/SPARK-5136 and squashes the following commits: f3baa66 [Sean Owen] Point to new IJ / Eclipse wiki link 016b7df [Sean Owen] Point to IntelliJ wiki page instead of also including IntelliJ notes in the docs Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/547df977 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/547df977 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/547df977 Branch: refs/heads/master Commit: 547df97715580f99ae573a49a86da12bf20cbc3d Parents: b4034c3 Author: Sean Owen so...@cloudera.com Authored: Fri Jan 9 09:35:46 2015 -0800 Committer: Patrick Wendell pwend...@gmail.com Committed: Fri Jan 9 09:35:46 2015 -0800 -- docs/building-spark.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/547df977/docs/building-spark.md -- diff --git a/docs/building-spark.md b/docs/building-spark.md index c1bcd91..fb93017 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -151,9 +151,10 @@ Thus, the full flow for running continuous-compilation of the `core` submodule m $ mvn scala:cc ``` -# Using With IntelliJ IDEA +# Building Spark with IntelliJ IDEA or Eclipse -This setup works fine in IntelliJ IDEA 11.1.4. After opening the project via the pom.xml file in the project root folder, you only need to activate either the hadoop1 or hadoop2 profile in the Maven Properties popout. We have not tried Eclipse/Scala IDE with this. +For help in setting up IntelliJ IDEA or Eclipse for Spark development, and troubleshooting, refer to the +[wiki page for IDE setup](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-IDESetup). # Building Spark Debian Packages - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: HOTFIX: Minor improvements to make-distribution.sh
Repository: spark Updated Branches: refs/heads/master 547df9771 - 1790b3869 HOTFIX: Minor improvements to make-distribution.sh 1. Renames $FWDIR to $SPARK_HOME (vast majority of diff). 2. Use Spark-provided Maven. 3. Logs build flags in the RELEASE file. Author: Patrick Wendell pwend...@gmail.com Closes #3973 from pwendell/master and squashes the following commits: 340a2fa [Patrick Wendell] HOTFIX: Minor improvements to make-distribution.sh Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1790b386 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1790b386 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1790b386 Branch: refs/heads/master Commit: 1790b38695b46400a24b0b7e278e8e8388748211 Parents: 547df97 Author: Patrick Wendell pwend...@gmail.com Authored: Fri Jan 9 09:40:18 2015 -0800 Committer: Patrick Wendell pwend...@gmail.com Committed: Fri Jan 9 09:40:18 2015 -0800 -- make-distribution.sh | 61 ++- 1 file changed, 34 insertions(+), 27 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1790b386/make-distribution.sh -- diff --git a/make-distribution.sh b/make-distribution.sh index 45c99e4..4e2f400 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -28,18 +28,20 @@ set -o pipefail set -e # Figure out where the Spark framework is installed -FWDIR=$(cd `dirname $0`; pwd) -DISTDIR=$FWDIR/dist +SPARK_HOME=$(cd `dirname $0`; pwd) +DISTDIR=$SPARK_HOME/dist SPARK_TACHYON=false MAKE_TGZ=false NAME=none +MVN=$SPARK_HOME/build/mvn function exit_with_usage { echo make-distribution.sh - tool for making binary distributions of Spark echo echo usage: - echo ./make-distribution.sh [--name] [--tgz] [--with-tachyon] maven build options + cl_options=[--name] [--tgz] [--mvn mvn-command] [--with-tachyon] + echo ./make-distribution.sh $cl_options maven build options echo See Spark's \Building Spark\ doc for correct Maven options. echo exit 1 @@ -71,6 +73,10 @@ while (( $# )); do --tgz) MAKE_TGZ=true ;; +--mvn) + MVN=$2 + shift + ;; --name) NAME=$2 shift @@ -109,9 +115,9 @@ if which git /dev/null; then unset GITREV fi -if ! which mvn /dev/null; then -echo -e You need Maven installed to build Spark. -echo -e Download Maven from https://maven.apache.org/; +if ! which $MVN /dev/null; then +echo -e Could not locate Maven command: '$MVN'. +echo -e Specify the Maven command with the --mvn flag exit -1; fi @@ -119,7 +125,7 @@ VERSION=$(mvn help:evaluate -Dexpression=project.version 2/dev/null | grep -v SPARK_HADOOP_VERSION=$(mvn help:evaluate -Dexpression=hadoop.version $@ 2/dev/null\ | grep -v INFO\ | tail -n 1) -SPARK_HIVE=$(mvn help:evaluate -Dexpression=project.activeProfiles -pl sql/hive $@ 2/dev/null\ +SPARK_HIVE=$($MVN help:evaluate -Dexpression=project.activeProfiles -pl sql/hive $@ 2/dev/null\ | grep -v INFO\ | fgrep --count idhive/id;\ # Reset exit status to 0, otherwise the script stops here if the last grep finds nothing\ @@ -161,11 +167,11 @@ else fi # Build uber fat JAR -cd $FWDIR +cd $SPARK_HOME export MAVEN_OPTS=-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m -BUILD_COMMAND=mvn clean package -DskipTests $@ +BUILD_COMMAND=$MVN clean package -DskipTests $@ # Actually build the jar echo -e \nBuilding with... @@ -177,41 +183,42 @@ ${BUILD_COMMAND} rm -rf $DISTDIR mkdir -p $DISTDIR/lib echo Spark $VERSION$GITREVSTRING built for Hadoop $SPARK_HADOOP_VERSION $DISTDIR/RELEASE +echo Build flags: $@ $DISTDIR/RELEASE # Copy jars -cp $FWDIR/assembly/target/scala*/*assembly*hadoop*.jar $DISTDIR/lib/ -cp $FWDIR/examples/target/scala*/spark-examples*.jar $DISTDIR/lib/ +cp $SPARK_HOME/assembly/target/scala*/*assembly*hadoop*.jar $DISTDIR/lib/ +cp $SPARK_HOME/examples/target/scala*/spark-examples*.jar $DISTDIR/lib/ # This will fail if the -Pyarn profile is not provided # In this case, silence the error and ignore the return code of this command -cp $FWDIR/network/yarn/target/scala*/spark-*-yarn-shuffle.jar $DISTDIR/lib/ /dev/null || : +cp $SPARK_HOME/network/yarn/target/scala*/spark-*-yarn-shuffle.jar $DISTDIR/lib/ /dev/null || : # Copy example sources (needed for python and SQL) mkdir -p $DISTDIR/examples/src/main -cp -r $FWDIR/examples/src/main $DISTDIR/examples/src/ +cp -r $SPARK_HOME/examples/src/main $DISTDIR/examples/src/ if [ $SPARK_HIVE == 1 ]; then - cp $FWDIR/lib_managed/jars/datanucleus*.jar $DISTDIR/lib/ + cp $SPARK_HOME/lib_managed/jars/datanucleus*.jar $DISTDIR/lib/ fi # Copy license and ASF files -cp $FWDIR/LICENSE $DISTDIR -cp $FWDIR/NOTICE
spark git commit: [SPARK-1143] Separate pool tests into their own suite.
Repository: spark Updated Branches: refs/heads/master 1790b3869 - b6aa55730 [SPARK-1143] Separate pool tests into their own suite. The current TaskSchedulerImplSuite includes some tests that are actually for the TaskSchedulerImpl, but the remainder of the tests avoid using the TaskSchedulerImpl entirely, and actually test the pool and scheduling algorithm mechanisms. This commit separates the pool/scheduling algorithm tests into their own suite, and also simplifies those tests. The pull request replaces #339. Author: Kay Ousterhout kayousterh...@gmail.com Closes #3967 from kayousterhout/SPARK-1143 and squashes the following commits: 8a898c4 [Kay Ousterhout] [SPARK-1143] Separate pool tests into their own suite. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b6aa5573 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b6aa5573 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b6aa5573 Branch: refs/heads/master Commit: b6aa557300275b835cce7baa7bc8a80eb5425cbb Parents: 1790b38 Author: Kay Ousterhout kayousterh...@gmail.com Authored: Fri Jan 9 09:47:06 2015 -0800 Committer: Patrick Wendell pwend...@gmail.com Committed: Fri Jan 9 09:47:06 2015 -0800 -- .../org/apache/spark/scheduler/PoolSuite.scala | 183 +++ .../scheduler/TaskSchedulerImplSuite.scala | 230 --- 2 files changed, 183 insertions(+), 230 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b6aa5573/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala new file mode 100644 index 000..e8f461e --- /dev/null +++ b/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala @@ -0,0 +1,183 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the License); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.scheduler + +import java.util.Properties + +import org.scalatest.FunSuite + +import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext} + +/** + * Tests that pools and the associated scheduling algorithms for FIFO and fair scheduling work + * correctly. + */ +class PoolSuite extends FunSuite with LocalSparkContext { + + def createTaskSetManager(stageId: Int, numTasks: Int, taskScheduler: TaskSchedulerImpl) +: TaskSetManager = { +val tasks = Array.tabulate[Task[_]](numTasks) { i = + new FakeTask(i, Nil) +} +new TaskSetManager(taskScheduler, new TaskSet(tasks, stageId, 0, 0, null), 0) + } + + def scheduleTaskAndVerifyId(taskId: Int, rootPool: Pool, expectedStageId: Int) { +val taskSetQueue = rootPool.getSortedTaskSetQueue +val nextTaskSetToSchedule = + taskSetQueue.find(t = (t.runningTasks + t.tasksSuccessful) t.numTasks) +assert(nextTaskSetToSchedule.isDefined) +nextTaskSetToSchedule.get.addRunningTask(taskId) +assert(nextTaskSetToSchedule.get.stageId === expectedStageId) + } + + test(FIFO Scheduler Test) { +sc = new SparkContext(local, TaskSchedulerImplSuite) +val taskScheduler = new TaskSchedulerImpl(sc) + +val rootPool = new Pool(, SchedulingMode.FIFO, 0, 0) +val schedulableBuilder = new FIFOSchedulableBuilder(rootPool) +schedulableBuilder.buildPools() + +val taskSetManager0 = createTaskSetManager(0, 2, taskScheduler) +val taskSetManager1 = createTaskSetManager(1, 2, taskScheduler) +val taskSetManager2 = createTaskSetManager(2, 2, taskScheduler) +schedulableBuilder.addTaskSetManager(taskSetManager0, null) +schedulableBuilder.addTaskSetManager(taskSetManager1, null) +schedulableBuilder.addTaskSetManager(taskSetManager2, null) + +scheduleTaskAndVerifyId(0, rootPool, 0) +scheduleTaskAndVerifyId(1, rootPool, 0) +scheduleTaskAndVerifyId(2, rootPool, 1) +scheduleTaskAndVerifyId(3, rootPool, 1) +scheduleTaskAndVerifyId(4, rootPool, 2) +scheduleTaskAndVerifyId(5, rootPool, 2) + } + + /** + * This test creates
spark git commit: SPARK-5136 [DOCS] Improve documentation around setting up Spark IntelliJ project
Repository: spark Updated Branches: refs/heads/branch-1.2 71471bd79 - 2f4e73d8f SPARK-5136 [DOCS] Improve documentation around setting up Spark IntelliJ project This PR simply points to the IntelliJ wiki page instead of also including IntelliJ notes in the docs. The intent however is to also update the wiki page with updated tips. This is the text I propose for the IntelliJ section on the wiki. I realize it omits some of the existing instructions on the wiki, about enabling Hive, but I think those are actually optional. -- IntelliJ supports both Maven- and SBT-based projects. It is recommended, however, to import Spark as a Maven project. Choose Import Project... from the File menu, and select the `pom.xml` file in the Spark root directory. It is fine to leave all settings at their default values in the Maven import wizard, with two caveats. First, it is usually useful to enable Import Maven projects automatically, sincchanges to the project structure will automatically update the IntelliJ project. Second, note the step that prompts you to choose active Maven build profiles. As documented above, some build configuration require specific profiles to be enabled. The same profiles that are enabled with `-P[profile name]` above may be enabled on this screen. For example, if developing for Hadoop 2.4 with YARN support, enable profiles `yarn` and `hadoop-2.4`. These selections can be changed later by accessing the Maven Projects tool window from the View menu, and expanding the Profiles section. Rebuild Project can fail the first time the project is compiled, because generate source files are not automatically generated. Try clicking the Generate Sources and Update Folders For All Projects button in the Maven Projects tool window to manually generate these sources. Compilation may fail with an error like scalac: bad option: -P:/home/jakub/.m2/repository/org/scalamacros/paradise_2.10.4/2.0.1/paradise_2.10.4-2.0.1.jar. If so, go to Preferences Build, Execution, Deployment Scala Compiler and clear the Additional compiler options field. It will work then although the option will come back when the project reimports. Author: Sean Owen so...@cloudera.com Closes #3952 from srowen/SPARK-5136 and squashes the following commits: f3baa66 [Sean Owen] Point to new IJ / Eclipse wiki link 016b7df [Sean Owen] Point to IntelliJ wiki page instead of also including IntelliJ notes in the docs (cherry picked from commit 547df97715580f99ae573a49a86da12bf20cbc3d) Signed-off-by: Patrick Wendell pwend...@gmail.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2f4e73d8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2f4e73d8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2f4e73d8 Branch: refs/heads/branch-1.2 Commit: 2f4e73d8f55c9a59dbd28b95688c3a09b44773a9 Parents: 71471bd Author: Sean Owen so...@cloudera.com Authored: Fri Jan 9 09:35:46 2015 -0800 Committer: Patrick Wendell pwend...@gmail.com Committed: Fri Jan 9 09:36:06 2015 -0800 -- docs/building-spark.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2f4e73d8/docs/building-spark.md -- diff --git a/docs/building-spark.md b/docs/building-spark.md index 72a9bfd..a4b3472 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -161,9 +161,10 @@ Thus, the full flow for running continuous-compilation of the `core` submodule m $ mvn scala:cc ``` -# Using With IntelliJ IDEA +# Building Spark with IntelliJ IDEA or Eclipse -This setup works fine in IntelliJ IDEA 11.1.4. After opening the project via the pom.xml file in the project root folder, you only need to activate either the hadoop1 or hadoop2 profile in the Maven Properties popout. We have not tried Eclipse/Scala IDE with this. +For help in setting up IntelliJ IDEA or Eclipse for Spark development, and troubleshooting, refer to the +[wiki page for IDE setup](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-IDESetup). # Building Spark Debian Packages - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [PySpark] Fix tests with Python 2.6 in 0.9 branch
Repository: spark Updated Branches: refs/heads/branch-0.9 63c0ff992 - 7d007d352 [PySpark] Fix tests with Python 2.6 in 0.9 branch [PySpark] [SPARK-2954] [SPARK-2948] [SPARK-2910] [SPARK-2101] Python 2.6 Fixes - Modify python/run-tests to test with Python 2.6 - Use unittest2 when running on Python 2.6. Author: Josh Rosen joshrosenapache.org Closes #3668 from davies/port_2365 and squashes the following commits: Author: cocoatomo cocoatom...@gmail.com Closes #3968 from davies/fix_python_tests and squashes the following commits: ac4a353 [cocoatomo] [PySpark] Fix tests with Python 2.6 in 1.0 branch Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7d007d35 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7d007d35 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7d007d35 Branch: refs/heads/branch-0.9 Commit: 7d007d352abc312231a775cb05183a1be85bb8e3 Parents: 63c0ff9 Author: cocoatomo cocoatom...@gmail.com Authored: Fri Jan 9 09:49:15 2015 -0800 Committer: Andrew Or and...@databricks.com Committed: Fri Jan 9 09:49:15 2015 -0800 -- python/pyspark/tests.py | 11 ++- python/run-tests| 10 ++ 2 files changed, 20 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7d007d35/python/pyspark/tests.py -- diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py index 5b124d9..81ef80e 100644 --- a/python/pyspark/tests.py +++ b/python/pyspark/tests.py @@ -26,7 +26,16 @@ import shutil import sys from tempfile import NamedTemporaryFile import time -import unittest + +if sys.version_info[:2] = (2, 6): +try: +import unittest2 as unittest +except ImportError: +sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') +sys.exit(1) +else: +import unittest + from pyspark.context import SparkContext from pyspark.files import SparkFiles http://git-wip-us.apache.org/repos/asf/spark/blob/7d007d35/python/run-tests -- diff --git a/python/run-tests b/python/run-tests index a986ac9..6eeef4c 100755 --- a/python/run-tests +++ b/python/run-tests @@ -33,6 +33,16 @@ function run_test() { FAILED=$((PIPESTATUS[0]||$FAILED)) } +echo Running PySpark tests. Output is in python/unit-tests.log. + +# Try to test with Python 2.6, since that's the minimum version that we support: +if [ $(which python2.6) ]; then +export PYSPARK_PYTHON=python2.6 +fi + +echo Testing with Python version: +$PYSPARK_PYTHON --version + run_test pyspark/rdd.py run_test pyspark/context.py run_test pyspark/conf.py - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-4006] Block Manager - Double Register Crash
Repository: spark Updated Branches: refs/heads/branch-0.9 3fba7b7bc - 6665df6b7 [SPARK-4006] Block Manager - Double Register Crash This issue affects all versions since 0.7 up to (including) 1.1 In long running contexts, we encountered the situation of double register without a remove in between. The cause for that is unknown, and assumed a temp network issue. However, since the second register is with a BlockManagerId on a different port, blockManagerInfo.contains() returns false, while blockManagerIdByExecutor returns Some. This inconsistency is caught in a conditional statement that does System.exit(1), which is a huge robustness issue for us. The fix - simply remove the old id from both maps during register when this happens. We are mimicking the behavior of expireDeadHosts(), by doing local cleanup of the maps before trying to add new ones. Also - added some logging for register and unregister. https://issues.apache.org/jira/browse/SPARK-4006 Author: Tal Sliwowicz ta...@taboola.com Closes #2854 from tsliwowicz/branch-0.9.2-block-mgr-removal and squashes the following commits: 95ae4db [Tal Sliwowicz] [SPARK-4006] In long running contexts, we encountered the situation of double registe... 81d69f0 [Tal Sliwowicz] fixed comment efd93f2 [Tal Sliwowicz] In long running contexts, we encountered the situation of double register without a remove in between. The cause for that is unknown, and assumed a temp network issue. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6665df6b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6665df6b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6665df6b Branch: refs/heads/branch-0.9 Commit: 6665df6b79a707357bd2b5be77223665077f17a2 Parents: 3fba7b7 Author: Tal Sliwowicz ta...@taboola.com Authored: Fri Jan 9 12:06:48 2015 -0800 Committer: Andrew Or and...@databricks.com Committed: Fri Jan 9 12:06:48 2015 -0800 -- .../spark/storage/BlockManagerMasterActor.scala| 17 +++-- 1 file changed, 11 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6665df6b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala -- diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala index 2c1a4e2..8a82dc1 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala @@ -160,6 +160,7 @@ class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf) extends Act blockLocations.remove(locations) } } +logInfo(sRemoving block manager $blockManagerId) } private def expireDeadHosts() { @@ -225,14 +226,18 @@ class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf) extends Act private def register(id: BlockManagerId, maxMemSize: Long, slaveActor: ActorRef) { if (!blockManagerInfo.contains(id)) { blockManagerIdByExecutor.get(id.executorId) match { -case Some(manager) = - // A block manager of the same executor already exists. - // This should never happen. Let's just quit. - logError(Got two different block manager registrations on + id.executorId) - System.exit(1) +case Some(oldId) = + // A block manager of the same executor already exists, so remove it (assumed dead) + logError(Got two different block manager registrations on same executor - + + s will replace old one $oldId with new one $id) + removeExecutor(id.executorId) case None = - blockManagerIdByExecutor(id.executorId) = id } + logInfo(Registering block manager %s with %s RAM, %s.format( +id.hostPort, Utils.bytesToString(maxMemSize), id)) + + blockManagerIdByExecutor(id.executorId) = id + blockManagerInfo(id) = new BlockManagerMasterActor.BlockManagerInfo( id, System.currentTimeMillis(), maxMemSize, slaveActor) } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-3619] Upgrade to Mesos 0.21 to work around MESOS-1688
Repository: spark Updated Branches: refs/heads/master e9ca16ec9 - 454fe129e [SPARK-3619] Upgrade to Mesos 0.21 to work around MESOS-1688 - update version from 0.18.1 to 0.21.0 - I'm doing some tests in order to verify some spark jobs work fine on mesos 0.21.0 environment. Author: Jongyoul Lee jongy...@gmail.com Closes #3934 from jongyoul/SPARK-3619 and squashes the following commits: ab994fa [Jongyoul Lee] [SPARK-3619] Upgrade to Mesos 0.21 to work around MESOS-1688 - update version from 0.18.1 to 0.21.0 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/454fe129 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/454fe129 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/454fe129 Branch: refs/heads/master Commit: 454fe129ee97b859bf079db8b9158e115a219ad5 Parents: e9ca16e Author: Jongyoul Lee jongy...@gmail.com Authored: Fri Jan 9 10:47:08 2015 -0800 Committer: Matei Zaharia ma...@databricks.com Committed: Fri Jan 9 10:47:08 2015 -0800 -- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/454fe129/pom.xml -- diff --git a/pom.xml b/pom.xml index 703e5c4..aadcdfd 100644 --- a/pom.xml +++ b/pom.xml @@ -115,7 +115,7 @@ java.version1.6/java.version sbt.project.namespark/sbt.project.name scala.macros.version2.0.1/scala.macros.version -mesos.version0.18.1/mesos.version +mesos.version0.21.0/mesos.version mesos.classifiershaded-protobuf/mesos.classifier slf4j.version1.7.5/slf4j.version log4j.version1.2.17/log4j.version - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[1/2] spark git commit: [HOTFIX] Disable Spark UI in SparkSubmitSuite tests
Repository: spark Updated Branches: refs/heads/branch-1.0 b1c1d8612 - b530bc92a [HOTFIX] Disable Spark UI in SparkSubmitSuite tests This should fix a major cause of build breaks when running many parallel tests. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b78422ae Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b78422ae Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b78422ae Branch: refs/heads/branch-1.0 Commit: b78422ae170b89fa09e8910e247cbfecc23442f8 Parents: b1c1d86 Author: Josh Rosen joshro...@databricks.com Authored: Fri Dec 12 12:38:37 2014 -0800 Committer: Andrew Or and...@databricks.com Committed: Fri Jan 9 14:53:25 2015 -0800 -- core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala | 2 ++ 1 file changed, 2 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b78422ae/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala index fe817e0..27e1ad9 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala @@ -260,6 +260,7 @@ class SparkSubmitSuite extends FunSuite with ShouldMatchers with ResetSystemProp --class, SimpleApplicationTest.getClass.getName.stripSuffix($), --name, testApp, --master, local, + --conf, spark.ui.enabled=false, unusedJar.toString) runSparkSubmit(args) } @@ -274,6 +275,7 @@ class SparkSubmitSuite extends FunSuite with ShouldMatchers with ResetSystemProp --name, testApp, --master, local-cluster[2,1,512], --jars, jarsString, + --conf, spark.ui.enabled=false, unusedJar.toString) runSparkSubmit(args) } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[2/2] spark git commit: [HOTFIX] Bind web UI to ephemeral port in DriverSuite
[HOTFIX] Bind web UI to ephemeral port in DriverSuite The job launched by DriverSuite should bind the web UI to an ephemeral port, since it looks like port contention in this test has caused a large number of Jenkins failures when many builds are started simultaneously. Our tests already disable the web UI, but this doesn't affect subprocesses launched by our tests. In this case, I've opted to bind to an ephemeral port instead of disabling the UI because disabling features in this test may mask its ability to catch certain bugs. See also: e24d3a9 Author: Josh Rosen joshro...@databricks.com Closes #3873 from JoshRosen/driversuite-webui-port and squashes the following commits: 48cd05c [Josh Rosen] [HOTFIX] Bind web UI to ephemeral port in DriverSuite. Conflicts: core/src/test/scala/org/apache/spark/DriverSuite.scala Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b530bc92 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b530bc92 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b530bc92 Branch: refs/heads/branch-1.0 Commit: b530bc92a523cd92e2ee14d4394fcb2a3f00afb9 Parents: b78422a Author: Josh Rosen joshro...@databricks.com Authored: Thu Jan 1 15:03:54 2015 -0800 Committer: Andrew Or and...@databricks.com Committed: Fri Jan 9 14:54:53 2015 -0800 -- -- - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-4737] Task set manager properly handles serialization errors
Repository: spark Updated Branches: refs/heads/master e96645206 - e0f28e010 [SPARK-4737] Task set manager properly handles serialization errors Dealing with [SPARK-4737], the handling of serialization errors should not be the DAGScheduler's responsibility. The task set manager now catches the error and aborts the stage. If the TaskSetManager throws a TaskNotSerializableException, the TaskSchedulerImpl will return an empty list of task descriptions, because no tasks were started. The scheduler should abort the stage gracefully. Note that I'm not too familiar with this part of the codebase and its place in the overall architecture of the Spark stack. If implementing it this way will have any averse side effects please voice that loudly. Author: mcheah mch...@palantir.com Closes #3638 from mccheah/task-set-manager-properly-handle-ser-err and squashes the following commits: 1545984 [mcheah] Some more style fixes from Andrew Or. 5267929 [mcheah] Fixing style suggestions from Andrew Or. dfa145b [mcheah] Fixing style from Josh Rosen's feedback b2a430d [mcheah] Not returning empty seq when a task set cannot be serialized. 94844d7 [mcheah] Fixing compilation error, one brace too many 5f486f4 [mcheah] Adding license header for fake task class bf5e706 [mcheah] Fixing indentation. 097e7a2 [mcheah] [SPARK-4737] Catching task serialization exception in TaskSetManager Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e0f28e01 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e0f28e01 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e0f28e01 Branch: refs/heads/master Commit: e0f28e010cdd67a2a4c8aebd35323d69a3182ba8 Parents: e966452 Author: mcheah mch...@palantir.com Authored: Fri Jan 9 14:16:20 2015 -0800 Committer: Andrew Or and...@databricks.com Committed: Fri Jan 9 14:16:20 2015 -0800 -- .../spark/TaskNotSerializableException.scala| 25 + .../apache/spark/scheduler/DAGScheduler.scala | 20 .../spark/scheduler/TaskSchedulerImpl.scala | 54 ++-- .../apache/spark/scheduler/TaskSetManager.scala | 18 +-- .../org/apache/spark/SharedSparkContext.scala | 2 +- .../scala/org/apache/spark/rdd/RDDSuite.scala | 21 .../scheduler/NotSerializableFakeTask.scala | 40 +++ .../scheduler/TaskSchedulerImplSuite.scala | 30 +++ .../spark/scheduler/TaskSetManagerSuite.scala | 14 + 9 files changed, 182 insertions(+), 42 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e0f28e01/core/src/main/scala/org/apache/spark/TaskNotSerializableException.scala -- diff --git a/core/src/main/scala/org/apache/spark/TaskNotSerializableException.scala b/core/src/main/scala/org/apache/spark/TaskNotSerializableException.scala new file mode 100644 index 000..9df6106 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/TaskNotSerializableException.scala @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the License); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark + +import org.apache.spark.annotation.DeveloperApi + +/** + * Exception thrown when a task cannot be serialized. + */ +private[spark] class TaskNotSerializableException(error: Throwable) extends Exception(error) http://git-wip-us.apache.org/repos/asf/spark/blob/e0f28e01/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala index 259621d..61d09d7 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala @@ -866,26 +866,6 @@ class DAGScheduler( } if (tasks.size 0) { - // Preemptively serialize a task to make sure it can be serialized. We are catching this - // exception here because it would be fairly
spark git commit: [DOC] Fixed Mesos version in doc from 0.18.1 to 0.21.0
Repository: spark Updated Branches: refs/heads/master e0f28e010 - ae628725a [DOC] Fixed Mesos version in doc from 0.18.1 to 0.21.0 #3934 upgraded Mesos version so we should also fix docs right? This issue is really minor so I don't file in JIRA. Author: Kousuke Saruta saru...@oss.nttdata.co.jp Closes #3982 from sarutak/fix-mesos-version and squashes the following commits: 9a86ee3 [Kousuke Saruta] Fixed mesos version from 0.18.1 to 0.21.0 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ae628725 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ae628725 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ae628725 Branch: refs/heads/master Commit: ae628725abce9ffe34b9a7110d5ac51a076454aa Parents: e0f28e0 Author: Kousuke Saruta saru...@oss.nttdata.co.jp Authored: Fri Jan 9 14:40:45 2015 -0800 Committer: Andrew Or and...@databricks.com Committed: Fri Jan 9 14:40:45 2015 -0800 -- docs/_config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ae628725/docs/_config.yml -- diff --git a/docs/_config.yml b/docs/_config.yml index a96a76d..e2db274 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -17,6 +17,6 @@ SPARK_VERSION: 1.3.0-SNAPSHOT SPARK_VERSION_SHORT: 1.3.0 SCALA_BINARY_VERSION: 2.10 SCALA_VERSION: 2.10.4 -MESOS_VERSION: 0.18.1 +MESOS_VERSION: 0.21.0 SPARK_ISSUE_TRACKER_URL: https://issues.apache.org/jira/browse/SPARK SPARK_GITHUB_URL: https://github.com/apache/spark - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [Minor] Fix import order and other coding style
Repository: spark Updated Branches: refs/heads/master ae628725a - 4e1f12d99 [Minor] Fix import order and other coding style fixed import order and other coding style Author: bilna bil...@am.amrita.edu Author: Bilna P biln...@gmail.com Closes #3966 from Bilna/master and squashes the following commits: 5e76f04 [bilna] fix import order and other coding style 5718d66 [bilna] Merge remote-tracking branch 'upstream/master' ae56514 [bilna] Merge remote-tracking branch 'upstream/master' acea3a3 [bilna] Adding dependency with scope test 28681fa [bilna] Merge remote-tracking branch 'upstream/master' fac3904 [bilna] Correction in Indentation and coding style ed9db4c [bilna] Merge remote-tracking branch 'upstream/master' 4b34ee7 [Bilna P] Update MQTTStreamSuite.scala 04503cf [bilna] Added embedded broker service for mqtt test 89d804e [bilna] Merge remote-tracking branch 'upstream/master' fc8eb28 [bilna] Merge remote-tracking branch 'upstream/master' 4b58094 [Bilna P] Update MQTTStreamSuite.scala b1ac4ad [bilna] Added BeforeAndAfter 5f6bfd2 [bilna] Added BeforeAndAfter e8b6623 [Bilna P] Update MQTTStreamSuite.scala 5ca6691 [Bilna P] Update MQTTStreamSuite.scala 8616495 [bilna] [SPARK-4631] unit test for MQTT Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4e1f12d9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4e1f12d9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4e1f12d9 Branch: refs/heads/master Commit: 4e1f12d997426560226648d62ee17c90352613e7 Parents: ae62872 Author: bilna bil...@am.amrita.edu Authored: Fri Jan 9 14:45:28 2015 -0800 Committer: Andrew Or and...@databricks.com Committed: Fri Jan 9 14:45:28 2015 -0800 -- .../spark/streaming/mqtt/MQTTStreamSuite.scala | 17 +++-- 1 file changed, 11 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4e1f12d9/external/mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala -- diff --git a/external/mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala b/external/mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala index 98fe6cb..39eb8b1 100644 --- a/external/mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala +++ b/external/mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala @@ -19,16 +19,19 @@ package org.apache.spark.streaming.mqtt import java.net.{URI, ServerSocket} +import scala.concurrent.duration._ + import org.apache.activemq.broker.{TransportConnector, BrokerService} -import org.apache.spark.util.Utils +import org.eclipse.paho.client.mqttv3._ +import org.eclipse.paho.client.mqttv3.persist.MqttDefaultFilePersistence + import org.scalatest.{BeforeAndAfter, FunSuite} import org.scalatest.concurrent.Eventually -import scala.concurrent.duration._ + import org.apache.spark.streaming.{Milliseconds, StreamingContext} import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.dstream.ReceiverInputDStream -import org.eclipse.paho.client.mqttv3._ -import org.eclipse.paho.client.mqttv3.persist.MqttDefaultFilePersistence +import org.apache.spark.util.Utils class MQTTStreamSuite extends FunSuite with Eventually with BeforeAndAfter { @@ -38,8 +41,9 @@ class MQTTStreamSuite extends FunSuite with Eventually with BeforeAndAfter { private val freePort = findFreePort() private val brokerUri = //localhost: + freePort private val topic = def - private var ssc: StreamingContext = _ private val persistenceDir = Utils.createTempDir() + + private var ssc: StreamingContext = _ private var broker: BrokerService = _ private var connector: TransportConnector = _ @@ -115,8 +119,9 @@ class MQTTStreamSuite extends FunSuite with Eventually with BeforeAndAfter { val message: MqttMessage = new MqttMessage(data.getBytes(utf-8)) message.setQos(1) message.setRetained(true) -for (i - 0 to 100) +for (i - 0 to 100) { msgTopic.publish(message) +} } } finally { client.disconnect() - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-5015] [mllib] Random seed for GMM + make test suite deterministic
Repository: spark Updated Branches: refs/heads/master 454fe129e - 7e8e62aec [SPARK-5015] [mllib] Random seed for GMM + make test suite deterministic Issues: * From JIRA: GaussianMixtureEM uses randomness but does not take a random seed. It should take one as a parameter. * This also makes the test suite flaky since initialization can fail due to stochasticity. Fix: * Add random seed * Use it in test suite CC: mengxr tgaloppo Author: Joseph K. Bradley jos...@databricks.com Closes #3981 from jkbradley/gmm-seed and squashes the following commits: f0df4fd [Joseph K. Bradley] Added seed parameter to GMM. Updated test suite to use seed to prevent flakiness Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7e8e62ae Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7e8e62ae Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7e8e62ae Branch: refs/heads/master Commit: 7e8e62aec11c43c983055adc475b96006412199a Parents: 454fe12 Author: Joseph K. Bradley jos...@databricks.com Authored: Fri Jan 9 13:00:15 2015 -0800 Committer: Xiangrui Meng m...@databricks.com Committed: Fri Jan 9 13:00:15 2015 -0800 -- .../mllib/clustering/GaussianMixtureEM.scala| 26 ++-- .../GMMExpectationMaximizationSuite.scala | 14 ++- 2 files changed, 27 insertions(+), 13 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7e8e62ae/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureEM.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureEM.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureEM.scala index 3a6c0e6..b3c5631 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureEM.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureEM.scala @@ -24,6 +24,7 @@ import org.apache.spark.rdd.RDD import org.apache.spark.mllib.linalg.{Matrices, Vector, Vectors, DenseVector, DenseMatrix, BLAS} import org.apache.spark.mllib.stat.impl.MultivariateGaussian import org.apache.spark.mllib.util.MLUtils +import org.apache.spark.util.Utils /** * This class performs expectation maximization for multivariate Gaussian @@ -45,10 +46,11 @@ import org.apache.spark.mllib.util.MLUtils class GaussianMixtureEM private ( private var k: Int, private var convergenceTol: Double, -private var maxIterations: Int) extends Serializable { +private var maxIterations: Int, +private var seed: Long) extends Serializable { /** A default instance, 2 Gaussians, 100 iterations, 0.01 log-likelihood threshold */ - def this() = this(2, 0.01, 100) + def this() = this(2, 0.01, 100, Utils.random.nextLong()) // number of samples per cluster to use when initializing Gaussians private val nSamples = 5 @@ -100,11 +102,21 @@ class GaussianMixtureEM private ( this } - /** Return the largest change in log-likelihood at which convergence is - * considered to have occurred. + /** + * Return the largest change in log-likelihood at which convergence is + * considered to have occurred. */ def getConvergenceTol: Double = convergenceTol - + + /** Set the random seed */ + def setSeed(seed: Long): this.type = { +this.seed = seed +this + } + + /** Return the random seed */ + def getSeed: Long = seed + /** Perform expectation maximization */ def run(data: RDD[Vector]): GaussianMixtureModel = { val sc = data.sparkContext @@ -113,7 +125,7 @@ class GaussianMixtureEM private ( val breezeData = data.map(u = u.toBreeze.toDenseVector).cache() // Get length of the input vectors -val d = breezeData.first.length +val d = breezeData.first().length // Determine initial weights and corresponding Gaussians. // If the user supplied an initial GMM, we use those values, otherwise @@ -126,7 +138,7 @@ class GaussianMixtureEM private ( }) case None = { -val samples = breezeData.takeSample(true, k * nSamples, scala.util.Random.nextInt) +val samples = breezeData.takeSample(withReplacement = true, k * nSamples, seed) (Array.fill(k)(1.0 / k), Array.tabulate(k) { i = val slice = samples.view(i * nSamples, (i + 1) * nSamples) new MultivariateGaussian(vectorMean(slice), initCovariance(slice)) http://git-wip-us.apache.org/repos/asf/spark/blob/7e8e62ae/mllib/src/test/scala/org/apache/spark/mllib/clustering/GMMExpectationMaximizationSuite.scala -- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/GMMExpectationMaximizationSuite.scala
spark git commit: [SPARK-1953][YARN]yarn client mode Application Master memory size is same as driver memory...
Repository: spark Updated Branches: refs/heads/master 7e8e62aec - e96645206 [SPARK-1953][YARN]yarn client mode Application Master memory size is same as driver memory... ... size Ways to set Application Master's memory on yarn-client mode: 1. `spark.yarn.am.memory` in SparkConf or System Properties 2. default value 512m Note: this arguments is only available in yarn-client mode. Author: WangTaoTheTonic barneystin...@aliyun.com Closes #3607 from WangTaoTheTonic/SPARK4181 and squashes the following commits: d5ceb1b [WangTaoTheTonic] spark.driver.memeory is used in both modes 6c1b264 [WangTaoTheTonic] rebase b8410c0 [WangTaoTheTonic] minor optiminzation ddcd592 [WangTaoTheTonic] fix the bug produced in rebase and some improvements 3bf70cc [WangTaoTheTonic] rebase and give proper hint 987b99d [WangTaoTheTonic] disable --driver-memory in client mode 2b27928 [WangTaoTheTonic] inaccurate description b7acbb2 [WangTaoTheTonic] incorrect method invoked 2557c5e [WangTaoTheTonic] missing a single blank 42075b0 [WangTaoTheTonic] arrange the args and warn logging 69c7dba [WangTaoTheTonic] rebase 1960d16 [WangTaoTheTonic] fix wrong comment 7fa9e2e [WangTaoTheTonic] log a warning f6bee0e [WangTaoTheTonic] docs issue d619996 [WangTaoTheTonic] Merge branch 'master' into SPARK4181 b09c309 [WangTaoTheTonic] use code format ab16bb5 [WangTaoTheTonic] fix bug and add comments 44e48c2 [WangTaoTheTonic] minor fix 6fd13e1 [WangTaoTheTonic] add overhead mem and remove some configs 0566bb8 [WangTaoTheTonic] yarn client mode Application Master memory size is same as driver memory size Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e9664520 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e9664520 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e9664520 Branch: refs/heads/master Commit: e96645206006a009e5c1a23bbd177dcaf3ef9b83 Parents: 7e8e62a Author: WangTaoTheTonic barneystin...@aliyun.com Authored: Fri Jan 9 13:20:32 2015 -0800 Committer: Andrew Or and...@databricks.com Committed: Fri Jan 9 13:23:13 2015 -0800 -- .../spark/deploy/SparkSubmitArguments.scala | 3 +- docs/running-on-yarn.md | 19 -- .../org/apache/spark/deploy/yarn/Client.scala | 2 +- .../spark/deploy/yarn/ClientArguments.scala | 37 +++- .../cluster/YarnClientSchedulerBackend.scala| 2 -- 5 files changed, 48 insertions(+), 15 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e9664520/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index 1faabe9..f14ef4d 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -405,7 +405,8 @@ private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, St | --queue QUEUE_NAME The YARN queue to submit to (Default: default). | --num-executors NUM Number of executors to launch (Default: 2). | --archives ARCHIVES Comma separated list of archives to be extracted into the -| working directory of each executor..stripMargin +| working directory of each executor. + .stripMargin ) SparkSubmit.exitFn() } http://git-wip-us.apache.org/repos/asf/spark/blob/e9664520/docs/running-on-yarn.md -- diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 183698f..4f27309 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -22,6 +22,14 @@ Most of the configs are the same for Spark on YARN as for other deployment modes table class=table trthProperty Name/ththDefault/ththMeaning/th/tr tr + tdcodespark.yarn.am.memory/code/td + td512m/td + td +Amount of memory to use for the YARN Application Master in client mode, in the same format as JVM memory strings (e.g. code512m/code, code2g/code). +In cluster mode, use codespark.driver.memory/code instead. + /td +/tr +tr tdcodespark.yarn.am.waitTime/code/td td10/td td @@ -90,7 +98,14 @@ Most of the configs are the same for Spark on YARN as for other deployment modes tdcodespark.yarn.driver.memoryOverhead/code/td tddriverMemory * 0.07, with minimum of 384 /td td -The amount of off heap memory (in megabytes) to be allocated per driver. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc.
spark git commit: [SPARK-5169][YARN]fetch the correct max attempts
Repository: spark Updated Branches: refs/heads/master 167a5ab0b - f3da4bd72 [SPARK-5169][YARN]fetch the correct max attempts Soryy for fetching the wrong max attempts in this commit https://github.com/apache/spark/commit/8fdd48959c93b9cf809f03549e2ae6c4687d1fcd. We need to fix it now. tgravescs If we set an spark.yarn.maxAppAttempts which is larger than `yarn.resourcemanager.am.max-attempts` in yarn side, it will be overrided as described here: The maximum number of application attempts. It's a global setting for all application masters. Each application master can specify its individual maximum number of application attempts via the API, but the individual number cannot be more than the global upper bound. If it is, the resourcemanager will override it. The default number is set to 2, to allow at least one retry for AM. http://hadoop.apache.org/docs/r2.6.0/hadoop-yarn/hadoop-yarn-common/yarn-default.xml Author: WangTaoTheTonic barneystin...@aliyun.com Closes #3942 from WangTaoTheTonic/HOTFIX and squashes the following commits: 9ac16ce [WangTaoTheTonic] fetch the correct max attempts Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f3da4bd7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f3da4bd7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f3da4bd7 Branch: refs/heads/master Commit: f3da4bd7289d493014ad3c5176ada60794dfcfe0 Parents: 167a5ab Author: WangTaoTheTonic barneystin...@aliyun.com Authored: Fri Jan 9 08:10:09 2015 -0600 Committer: Thomas Graves tgra...@apache.org Committed: Fri Jan 9 08:10:09 2015 -0600 -- .../org/apache/spark/deploy/yarn/YarnRMClient.scala | 12 +--- 1 file changed, 9 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f3da4bd7/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala -- diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala index e183efc..b45e599 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala @@ -121,9 +121,15 @@ private[spark] class YarnRMClient(args: ApplicationMasterArguments) extends Logg /** Returns the maximum number of attempts to register the AM. */ def getMaxRegAttempts(sparkConf: SparkConf, yarnConf: YarnConfiguration): Int = { -sparkConf.getOption(spark.yarn.maxAppAttempts).map(_.toInt).getOrElse( - yarnConf.getInt( -YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS)) +val sparkMaxAttempts = sparkConf.getOption(spark.yarn.maxAppAttempts).map(_.toInt) +val yarnMaxAttempts = yarnConf.getInt( + YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS) +val retval: Int = sparkMaxAttempts match { + case Some(x) = if (x = yarnMaxAttempts) x else yarnMaxAttempts + case None = yarnMaxAttempts +} + +retval } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org