spark git commit: Style fix for the previous 3 JDBC filter push down commits.
Repository: spark Updated Branches: refs/heads/master 2aad2d372 -> 554d840a9 Style fix for the previous 3 JDBC filter push down commits. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/554d840a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/554d840a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/554d840a Branch: refs/heads/master Commit: 554d840a9ade79722c96972257435a05e2aa9d88 Parents: 2aad2d3 Author: Reynold Xin Authored: Tue Dec 15 22:32:51 2015 -0800 Committer: Reynold Xin Committed: Tue Dec 15 22:32:51 2015 -0800 -- .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 17 - 1 file changed, 8 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/554d840a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index d6aeb52..2b91f62 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -176,15 +176,14 @@ class JDBCSuite extends SparkFunSuite with BeforeAndAfter with SharedSQLContext } test("SELECT * WHERE (simple predicates)") { -assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID < 1")).collect().size === 0) -assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID != 2")).collect().size === 2) -assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID = 1")).collect().size === 1) -assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME = 'fred'")).collect().size === 1) -assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME > 'fred'")).collect().size === 2) -assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME != 'fred'")).collect().size === 2) -assert(stripSparkFilter(sql("SELECT * FROM nulltypes WHERE A IS NULL")).collect().size === 1) -assert(stripSparkFilter( - sql("SELECT * FROM nulltypes WHERE A IS NOT NULL")).collect().size === 0) +assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID < 1")).collect().size == 0) +assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID != 2")).collect().size == 2) +assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID = 1")).collect().size == 1) +assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME = 'fred'")).collect().size == 1) +assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME > 'fred'")).collect().size == 2) +assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME != 'fred'")).collect().size == 2) +assert(stripSparkFilter(sql("SELECT * FROM nulltypes WHERE A IS NULL")).collect().size == 1) +assert(stripSparkFilter(sql("SELECT * FROM nulltypes WHERE A IS NOT NULL")).collect().size == 0) } test("SELECT * WHERE (quoted strings)") { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-12315][SQL] isnotnull operator not pushed down for JDBC datasource.
Repository: spark Updated Branches: refs/heads/master 7f443a687 -> 2aad2d372 [SPARK-12315][SQL] isnotnull operator not pushed down for JDBC datasource. https://issues.apache.org/jira/browse/SPARK-12315 `IsNotNull` filter is not being pushed down for JDBC datasource. It looks it is SQL standard according to [SQL-92](http://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt), SQL:1999, [SQL:2003](http://www.wiscorp.com/sql_2003_standard.zip) and [SQL:201x](http://www.wiscorp.com/sql20nn.zip) and I believe most databases support this. In this PR, I simply added the case for `IsNotNull` filter to produce a proper filter string. Author: hyukjinkwon This patch had conflicts when merged, resolved by Committer: Reynold Xin Closes #10287 from HyukjinKwon/SPARK-12315. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2aad2d37 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2aad2d37 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2aad2d37 Branch: refs/heads/master Commit: 2aad2d372469aaf2773876cae98ef002fef03aa3 Parents: 7f443a6 Author: hyukjinkwon Authored: Tue Dec 15 22:30:35 2015 -0800 Committer: Reynold Xin Committed: Tue Dec 15 22:30:35 2015 -0800 -- .../org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala | 1 + sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala | 2 ++ 2 files changed, 3 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2aad2d37/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala index 3271b46..2d38562 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala @@ -287,6 +287,7 @@ private[sql] class JDBCRDD( case LessThanOrEqual(attr, value) => s"$attr <= ${compileValue(value)}" case GreaterThanOrEqual(attr, value) => s"$attr >= ${compileValue(value)}" case IsNull(attr) => s"$attr IS NULL" +case IsNotNull(attr) => s"$attr IS NOT NULL" case _ => null } http://git-wip-us.apache.org/repos/asf/spark/blob/2aad2d37/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index 0305667..d6aeb52 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -183,6 +183,8 @@ class JDBCSuite extends SparkFunSuite with BeforeAndAfter with SharedSQLContext assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME > 'fred'")).collect().size === 2) assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME != 'fred'")).collect().size === 2) assert(stripSparkFilter(sql("SELECT * FROM nulltypes WHERE A IS NULL")).collect().size === 1) +assert(stripSparkFilter( + sql("SELECT * FROM nulltypes WHERE A IS NOT NULL")).collect().size === 0) } test("SELECT * WHERE (quoted strings)") { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-12314][SQL] isnull operator not pushed down for JDBC datasource.
Repository: spark Updated Branches: refs/heads/master 0f6936b5f -> 7f443a687 [SPARK-12314][SQL] isnull operator not pushed down for JDBC datasource. https://issues.apache.org/jira/browse/SPARK-12314 `IsNull` filter is not being pushed down for JDBC datasource. It looks it is SQL standard according to [SQL-92](http://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt), SQL:1999, [SQL:2003](http://www.wiscorp.com/sql_2003_standard.zip) and [SQL:201x](http://www.wiscorp.com/sql20nn.zip) and I believe most databases support this. In this PR, I simply added the case for `IsNull` filter to produce a proper filter string. Author: hyukjinkwon This patch had conflicts when merged, resolved by Committer: Reynold Xin Closes #10286 from HyukjinKwon/SPARK-12314. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7f443a68 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7f443a68 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7f443a68 Branch: refs/heads/master Commit: 7f443a6879fa33ca8adb682bd85df2d56fb5fcda Parents: 0f6936b Author: hyukjinkwon Authored: Tue Dec 15 22:25:08 2015 -0800 Committer: Reynold Xin Committed: Tue Dec 15 22:25:08 2015 -0800 -- .../org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala | 1 + sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala | 1 + 2 files changed, 2 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7f443a68/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala index c18a2d2..3271b46 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala @@ -286,6 +286,7 @@ private[sql] class JDBCRDD( case GreaterThan(attr, value) => s"$attr > ${compileValue(value)}" case LessThanOrEqual(attr, value) => s"$attr <= ${compileValue(value)}" case GreaterThanOrEqual(attr, value) => s"$attr >= ${compileValue(value)}" +case IsNull(attr) => s"$attr IS NULL" case _ => null } http://git-wip-us.apache.org/repos/asf/spark/blob/7f443a68/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index aca1443..0305667 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -182,6 +182,7 @@ class JDBCSuite extends SparkFunSuite with BeforeAndAfter with SharedSQLContext assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME = 'fred'")).collect().size === 1) assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME > 'fred'")).collect().size === 2) assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME != 'fred'")).collect().size === 2) +assert(stripSparkFilter(sql("SELECT * FROM nulltypes WHERE A IS NULL")).collect().size === 1) } test("SELECT * WHERE (quoted strings)") { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-12249][SQL] JDBC non-equality comparison operator not pushed down.
Repository: spark Updated Branches: refs/heads/master d52bf47e1 -> 0f6936b5f [SPARK-12249][SQL] JDBC non-equality comparison operator not pushed down. https://issues.apache.org/jira/browse/SPARK-12249 Currently `!=` operator is not pushed down correctly. I simply added a case for this. Author: hyukjinkwon Closes #10233 from HyukjinKwon/SPARK-12249. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0f6936b5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0f6936b5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0f6936b5 Branch: refs/heads/master Commit: 0f6936b5f1c9b0be1c33b98ffb62a72ae0c3e2a8 Parents: d52bf47 Author: hyukjinkwon Authored: Tue Dec 15 22:22:49 2015 -0800 Committer: Reynold Xin Committed: Tue Dec 15 22:22:49 2015 -0800 -- .../org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala | 1 + sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala | 2 ++ 2 files changed, 3 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0f6936b5/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala index 1c348ed..c18a2d2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala @@ -281,6 +281,7 @@ private[sql] class JDBCRDD( */ private def compileFilter(f: Filter): String = f match { case EqualTo(attr, value) => s"$attr = ${compileValue(value)}" +case Not(EqualTo(attr, value)) => s"$attr != ${compileValue(value)}" case LessThan(attr, value) => s"$attr < ${compileValue(value)}" case GreaterThan(attr, value) => s"$attr > ${compileValue(value)}" case LessThanOrEqual(attr, value) => s"$attr <= ${compileValue(value)}" http://git-wip-us.apache.org/repos/asf/spark/blob/0f6936b5/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index a360947..aca1443 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -177,9 +177,11 @@ class JDBCSuite extends SparkFunSuite with BeforeAndAfter with SharedSQLContext test("SELECT * WHERE (simple predicates)") { assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID < 1")).collect().size === 0) +assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID != 2")).collect().size === 2) assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID = 1")).collect().size === 1) assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME = 'fred'")).collect().size === 1) assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME > 'fred'")).collect().size === 2) +assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME != 'fred'")).collect().size === 2) } test("SELECT * WHERE (quoted strings)") { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-12304][STREAMING] Make Spark Streaming web UI display more fri…
Repository: spark Updated Branches: refs/heads/master ca0690b5e -> d52bf47e1 [SPARK-12304][STREAMING] Make Spark Streaming web UI display more fri⦠â¦endly Receiver graphs Currently, the Spark Streaming web UI uses the same maxY when displays 'Input Rate Times& Histograms' and 'Per-Receiver Times& Histograms'. This may lead to somewhat un-friendly graphs: once we have tens of Receivers or more, every 'Per-Receiver Times' line almost hits the ground. This issue proposes to calculate a new maxY against the original one, which is shared among all the `Per-Receiver Times& Histograms' graphs. Before: ![before-5](https://cloud.githubusercontent.com/assets/15843379/11761362/d790c356-a0fa-11e5-860e-4b834603de1d.png) After: ![after-5](https://cloud.githubusercontent.com/assets/15843379/11761361/cfabf692-a0fa-11e5-97d0-4ad124aaca2a.png) Author: proflin Closes #10318 from proflin/SPARK-12304. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d52bf47e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d52bf47e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d52bf47e Branch: refs/heads/master Commit: d52bf47e13e0186590437f71040100d2f6f11da9 Parents: ca0690b Author: proflin Authored: Tue Dec 15 20:22:56 2015 -0800 Committer: Shixiong Zhu Committed: Tue Dec 15 20:22:56 2015 -0800 -- .../scala/org/apache/spark/streaming/ui/StreamingPage.scala | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d52bf47e/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala -- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala index 88a4483..b3692c3 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala @@ -392,9 +392,15 @@ private[ui] class StreamingPage(parent: StreamingTab) maxX: Long, minY: Double, maxY: Double): Seq[Node] = { +val maxYCalculated = listener.receivedEventRateWithBatchTime.values + .flatMap { case streamAndRates => streamAndRates.map { case (_, eventRate) => eventRate } } + .reduceOption[Double](math.max) + .map(_.ceil.toLong) + .getOrElse(0L) + val content = listener.receivedEventRateWithBatchTime.toList.sortBy(_._1).map { case (streamId, eventRates) => -generateInputDStreamRow(jsCollector, streamId, eventRates, minX, maxX, minY, maxY) +generateInputDStreamRow(jsCollector, streamId, eventRates, minX, maxX, minY, maxYCalculated) }.foldLeft[Seq[Node]](Nil)(_ ++ _) // scalastyle:off - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-4117][YARN] Spark on Yarn handle AM being told command from RM
Repository: spark Updated Branches: refs/heads/master a89e8b612 -> ca0690b5e [SPARK-4117][YARN] Spark on Yarn handle AM being told command from RM Spark on Yarn handle AM being told command from RM When RM throws ApplicationAttemptNotFoundException for allocate invocation, making the ApplicationMaster to finish immediately without any retries. Author: Devaraj K Closes #10129 from devaraj-kavali/SPARK-4117. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ca0690b5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ca0690b5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ca0690b5 Branch: refs/heads/master Commit: ca0690b5ef10b14ce57a0c30d5308eb02f163f39 Parents: a89e8b6 Author: Devaraj K Authored: Tue Dec 15 18:30:59 2015 -0800 Committer: Andrew Or Committed: Tue Dec 15 18:30:59 2015 -0800 -- .../org/apache/spark/deploy/yarn/ApplicationMaster.scala| 9 - 1 file changed, 8 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ca0690b5/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala -- diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index 1970f7d..fc742df 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -376,7 +376,14 @@ private[spark] class ApplicationMaster( case i: InterruptedException => case e: Throwable => { failureCount += 1 - if (!NonFatal(e) || failureCount >= reporterMaxFailures) { + // this exception was introduced in hadoop 2.4 and this code would not compile + // with earlier versions if we refer it directly. + if ("org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException" == +e.getClass().getName()) { +logError("Exception from Reporter thread.", e) +finish(FinalApplicationStatus.FAILED, ApplicationMaster.EXIT_REPORTER_FAILURE, + e.getMessage) + } else if (!NonFatal(e) || failureCount >= reporterMaxFailures) { finish(FinalApplicationStatus.FAILED, ApplicationMaster.EXIT_REPORTER_FAILURE, "Exception was thrown " + s"$failureCount time(s) from Reporter thread.") - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-10477][SQL] using DSL in ColumnPruningSuite to improve readability
Repository: spark Updated Branches: refs/heads/branch-1.6 93095eb29 -> fb08f7b78 [SPARK-10477][SQL] using DSL in ColumnPruningSuite to improve readability Author: Wenchen Fan Closes #8645 from cloud-fan/test. (cherry picked from commit a89e8b6122ee5a1517fbcf405b1686619db56696) Signed-off-by: Andrew Or Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fb08f7b7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fb08f7b7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fb08f7b7 Branch: refs/heads/branch-1.6 Commit: fb08f7b784bc8b5e0cd110f315f72c7d9fc65e08 Parents: 93095eb Author: Wenchen Fan Authored: Tue Dec 15 18:29:19 2015 -0800 Committer: Andrew Or Committed: Tue Dec 15 18:29:25 2015 -0800 -- .../apache/spark/sql/catalyst/dsl/package.scala | 7 ++-- .../catalyst/optimizer/ColumnPruningSuite.scala | 41 +++- 2 files changed, 27 insertions(+), 21 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fb08f7b7/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index af594c2..e509711 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -275,13 +275,14 @@ package object dsl { def unionAll(otherPlan: LogicalPlan): LogicalPlan = Union(logicalPlan, otherPlan) - // TODO specify the output column names def generate( generator: Generator, join: Boolean = false, outer: Boolean = false, -alias: Option[String] = None): LogicalPlan = -Generate(generator, join = join, outer = outer, alias, Nil, logicalPlan) +alias: Option[String] = None, +outputNames: Seq[String] = Nil): LogicalPlan = +Generate(generator, join = join, outer = outer, alias, + outputNames.map(UnresolvedAttribute(_)), logicalPlan) def insertInto(tableName: String, overwrite: Boolean = false): LogicalPlan = InsertIntoTable( http://git-wip-us.apache.org/repos/asf/spark/blob/fb08f7b7/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala index 4a1e7ce..9bf61ae 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.expressions.Explode import org.apache.spark.sql.catalyst.plans.PlanTest -import org.apache.spark.sql.catalyst.plans.logical.{Project, LocalRelation, Generate, LogicalPlan} +import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ @@ -35,12 +35,11 @@ class ColumnPruningSuite extends PlanTest { test("Column pruning for Generate when Generate.join = false") { val input = LocalRelation('a.int, 'b.array(StringType)) -val query = Generate(Explode('b), false, false, None, 's.string :: Nil, input).analyze +val query = input.generate(Explode('b), join = false).analyze + val optimized = Optimize.execute(query) -val correctAnswer = - Generate(Explode('b), false, false, None, 's.string :: Nil, -Project('b.attr :: Nil, input)).analyze +val correctAnswer = input.select('b).generate(Explode('b), join = false).analyze comparePlans(optimized, correctAnswer) } @@ -49,16 +48,19 @@ class ColumnPruningSuite extends PlanTest { val input = LocalRelation('a.int, 'b.int, 'c.array(StringType)) val query = - Project(Seq('a, 's), -Generate(Explode('c), true, false, None, 's.string :: Nil, - input)).analyze + input +.generate(Explode('c), join = true, outputNames = "explode" :: Nil) +.select('a, 'explode) +.analyze + val optimized = Optimize.execute(query) val correctAnswer = - Project(Seq('a, 's), -Generate(Explode('c), true, false, None, 's.string :: Nil, - Project(Seq('a, 'c), -input))).analyze + input +
spark git commit: [SPARK-10477][SQL] using DSL in ColumnPruningSuite to improve readability
Repository: spark Updated Branches: refs/heads/master c5b6b398d -> a89e8b612 [SPARK-10477][SQL] using DSL in ColumnPruningSuite to improve readability Author: Wenchen Fan Closes #8645 from cloud-fan/test. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a89e8b61 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a89e8b61 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a89e8b61 Branch: refs/heads/master Commit: a89e8b6122ee5a1517fbcf405b1686619db56696 Parents: c5b6b39 Author: Wenchen Fan Authored: Tue Dec 15 18:29:19 2015 -0800 Committer: Andrew Or Committed: Tue Dec 15 18:29:19 2015 -0800 -- .../apache/spark/sql/catalyst/dsl/package.scala | 7 ++-- .../catalyst/optimizer/ColumnPruningSuite.scala | 41 +++- 2 files changed, 27 insertions(+), 21 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a89e8b61/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index af594c2..e509711 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -275,13 +275,14 @@ package object dsl { def unionAll(otherPlan: LogicalPlan): LogicalPlan = Union(logicalPlan, otherPlan) - // TODO specify the output column names def generate( generator: Generator, join: Boolean = false, outer: Boolean = false, -alias: Option[String] = None): LogicalPlan = -Generate(generator, join = join, outer = outer, alias, Nil, logicalPlan) +alias: Option[String] = None, +outputNames: Seq[String] = Nil): LogicalPlan = +Generate(generator, join = join, outer = outer, alias, + outputNames.map(UnresolvedAttribute(_)), logicalPlan) def insertInto(tableName: String, overwrite: Boolean = false): LogicalPlan = InsertIntoTable( http://git-wip-us.apache.org/repos/asf/spark/blob/a89e8b61/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala index 4a1e7ce..9bf61ae 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.expressions.Explode import org.apache.spark.sql.catalyst.plans.PlanTest -import org.apache.spark.sql.catalyst.plans.logical.{Project, LocalRelation, Generate, LogicalPlan} +import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ @@ -35,12 +35,11 @@ class ColumnPruningSuite extends PlanTest { test("Column pruning for Generate when Generate.join = false") { val input = LocalRelation('a.int, 'b.array(StringType)) -val query = Generate(Explode('b), false, false, None, 's.string :: Nil, input).analyze +val query = input.generate(Explode('b), join = false).analyze + val optimized = Optimize.execute(query) -val correctAnswer = - Generate(Explode('b), false, false, None, 's.string :: Nil, -Project('b.attr :: Nil, input)).analyze +val correctAnswer = input.select('b).generate(Explode('b), join = false).analyze comparePlans(optimized, correctAnswer) } @@ -49,16 +48,19 @@ class ColumnPruningSuite extends PlanTest { val input = LocalRelation('a.int, 'b.int, 'c.array(StringType)) val query = - Project(Seq('a, 's), -Generate(Explode('c), true, false, None, 's.string :: Nil, - input)).analyze + input +.generate(Explode('c), join = true, outputNames = "explode" :: Nil) +.select('a, 'explode) +.analyze + val optimized = Optimize.execute(query) val correctAnswer = - Project(Seq('a, 's), -Generate(Explode('c), true, false, None, 's.string :: Nil, - Project(Seq('a, 'c), -input))).analyze + input +.select('a, 'c) +.generate(Explode('c), join = true, outputNames = "explode" :: Nil) +
spark git commit: [SPARK-12062][CORE] Change Master to asyc rebuild UI when application completes
Repository: spark Updated Branches: refs/heads/branch-1.6 8e9a60031 -> 93095eb29 [SPARK-12062][CORE] Change Master to asyc rebuild UI when application completes This change builds the event history of completed apps asynchronously so the RPC thread will not be blocked and allow new workers to register/remove if the event log history is very large and takes a long time to rebuild. Author: Bryan Cutler Closes #10284 from BryanCutler/async-MasterUI-SPARK-12062. (cherry picked from commit c5b6b398d5e368626e589feede80355fb74c2bd8) Signed-off-by: Andrew Or Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/93095eb2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/93095eb2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/93095eb2 Branch: refs/heads/branch-1.6 Commit: 93095eb29a1e59dbdbf6220bfa732b502330e6ae Parents: 8e9a600 Author: Bryan Cutler Authored: Tue Dec 15 18:28:16 2015 -0800 Committer: Andrew Or Committed: Tue Dec 15 18:28:26 2015 -0800 -- .../org/apache/spark/deploy/master/Master.scala | 79 +--- .../spark/deploy/master/MasterMessages.scala| 2 + 2 files changed, 52 insertions(+), 29 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/93095eb2/core/src/main/scala/org/apache/spark/deploy/master/Master.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala index 1355e1a..fc42bf0 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala @@ -21,9 +21,11 @@ import java.io.FileNotFoundException import java.net.URLEncoder import java.text.SimpleDateFormat import java.util.Date -import java.util.concurrent.{ScheduledFuture, TimeUnit} +import java.util.concurrent.{ConcurrentHashMap, ScheduledFuture, TimeUnit} import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet} +import scala.concurrent.duration.Duration +import scala.concurrent.{Await, ExecutionContext, Future} import scala.language.postfixOps import scala.util.Random @@ -56,6 +58,10 @@ private[deploy] class Master( private val forwardMessageThread = ThreadUtils.newDaemonSingleThreadScheduledExecutor("master-forward-message-thread") + private val rebuildUIThread = +ThreadUtils.newDaemonSingleThreadExecutor("master-rebuild-ui-thread") + private val rebuildUIContext = ExecutionContext.fromExecutor(rebuildUIThread) + private val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf) private def createDateFormat = new SimpleDateFormat("MMddHHmmss") // For application IDs @@ -78,7 +84,8 @@ private[deploy] class Master( private val addressToApp = new HashMap[RpcAddress, ApplicationInfo] private val completedApps = new ArrayBuffer[ApplicationInfo] private var nextAppNumber = 0 - private val appIdToUI = new HashMap[String, SparkUI] + // Using ConcurrentHashMap so that master-rebuild-ui-thread can add a UI after asyncRebuildUI + private val appIdToUI = new ConcurrentHashMap[String, SparkUI] private val drivers = new HashSet[DriverInfo] private val completedDrivers = new ArrayBuffer[DriverInfo] @@ -191,6 +198,7 @@ private[deploy] class Master( checkForWorkerTimeOutTask.cancel(true) } forwardMessageThread.shutdownNow() +rebuildUIThread.shutdownNow() webUi.stop() restServer.foreach(_.stop()) masterMetricsSystem.stop() @@ -367,6 +375,10 @@ private[deploy] class Master( case CheckForWorkerTimeOut => { timeOutDeadWorkers() } + +case AttachCompletedRebuildUI(appId) => + // An asyncRebuildSparkUI has completed, so need to attach to master webUi + Option(appIdToUI.get(appId)).foreach { ui => webUi.attachSparkUI(ui) } } override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = { @@ -809,7 +821,7 @@ private[deploy] class Master( if (completedApps.size >= RETAINED_APPLICATIONS) { val toRemove = math.max(RETAINED_APPLICATIONS / 10, 1) completedApps.take(toRemove).foreach( a => { - appIdToUI.remove(a.id).foreach { ui => webUi.detachSparkUI(ui) } + Option(appIdToUI.remove(a.id)).foreach { ui => webUi.detachSparkUI(ui) } applicationMetricsSystem.removeSource(a.appSource) }) completedApps.trimStart(toRemove) @@ -818,7 +830,7 @@ private[deploy] class Master( waitingApps -= app // If application events are logged, use them to rebuild the UI - rebuildSparkUI(app) + asyncRebuildSparkUI(app) for (exec <- app.executors.values) { killExecutor(exec) @@ -923,49 +935,57 @@ private[deploy] c
spark git commit: [SPARK-12062][CORE] Change Master to asyc rebuild UI when application completes
Repository: spark Updated Branches: refs/heads/master 8a215d233 -> c5b6b398d [SPARK-12062][CORE] Change Master to asyc rebuild UI when application completes This change builds the event history of completed apps asynchronously so the RPC thread will not be blocked and allow new workers to register/remove if the event log history is very large and takes a long time to rebuild. Author: Bryan Cutler Closes #10284 from BryanCutler/async-MasterUI-SPARK-12062. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c5b6b398 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c5b6b398 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c5b6b398 Branch: refs/heads/master Commit: c5b6b398d5e368626e589feede80355fb74c2bd8 Parents: 8a215d2 Author: Bryan Cutler Authored: Tue Dec 15 18:28:16 2015 -0800 Committer: Andrew Or Committed: Tue Dec 15 18:28:16 2015 -0800 -- .../org/apache/spark/deploy/master/Master.scala | 79 +--- .../spark/deploy/master/MasterMessages.scala| 2 + 2 files changed, 52 insertions(+), 29 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c5b6b398/core/src/main/scala/org/apache/spark/deploy/master/Master.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala index 1355e1a..fc42bf0 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala @@ -21,9 +21,11 @@ import java.io.FileNotFoundException import java.net.URLEncoder import java.text.SimpleDateFormat import java.util.Date -import java.util.concurrent.{ScheduledFuture, TimeUnit} +import java.util.concurrent.{ConcurrentHashMap, ScheduledFuture, TimeUnit} import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet} +import scala.concurrent.duration.Duration +import scala.concurrent.{Await, ExecutionContext, Future} import scala.language.postfixOps import scala.util.Random @@ -56,6 +58,10 @@ private[deploy] class Master( private val forwardMessageThread = ThreadUtils.newDaemonSingleThreadScheduledExecutor("master-forward-message-thread") + private val rebuildUIThread = +ThreadUtils.newDaemonSingleThreadExecutor("master-rebuild-ui-thread") + private val rebuildUIContext = ExecutionContext.fromExecutor(rebuildUIThread) + private val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf) private def createDateFormat = new SimpleDateFormat("MMddHHmmss") // For application IDs @@ -78,7 +84,8 @@ private[deploy] class Master( private val addressToApp = new HashMap[RpcAddress, ApplicationInfo] private val completedApps = new ArrayBuffer[ApplicationInfo] private var nextAppNumber = 0 - private val appIdToUI = new HashMap[String, SparkUI] + // Using ConcurrentHashMap so that master-rebuild-ui-thread can add a UI after asyncRebuildUI + private val appIdToUI = new ConcurrentHashMap[String, SparkUI] private val drivers = new HashSet[DriverInfo] private val completedDrivers = new ArrayBuffer[DriverInfo] @@ -191,6 +198,7 @@ private[deploy] class Master( checkForWorkerTimeOutTask.cancel(true) } forwardMessageThread.shutdownNow() +rebuildUIThread.shutdownNow() webUi.stop() restServer.foreach(_.stop()) masterMetricsSystem.stop() @@ -367,6 +375,10 @@ private[deploy] class Master( case CheckForWorkerTimeOut => { timeOutDeadWorkers() } + +case AttachCompletedRebuildUI(appId) => + // An asyncRebuildSparkUI has completed, so need to attach to master webUi + Option(appIdToUI.get(appId)).foreach { ui => webUi.attachSparkUI(ui) } } override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = { @@ -809,7 +821,7 @@ private[deploy] class Master( if (completedApps.size >= RETAINED_APPLICATIONS) { val toRemove = math.max(RETAINED_APPLICATIONS / 10, 1) completedApps.take(toRemove).foreach( a => { - appIdToUI.remove(a.id).foreach { ui => webUi.detachSparkUI(ui) } + Option(appIdToUI.remove(a.id)).foreach { ui => webUi.detachSparkUI(ui) } applicationMetricsSystem.removeSource(a.appSource) }) completedApps.trimStart(toRemove) @@ -818,7 +830,7 @@ private[deploy] class Master( waitingApps -= app // If application events are logged, use them to rebuild the UI - rebuildSparkUI(app) + asyncRebuildSparkUI(app) for (exec <- app.executors.values) { killExecutor(exec) @@ -923,49 +935,57 @@ private[deploy] class Master( * Return the UI if successful, else None */ private[master] def rebuildSparkUI(a
spark git commit: [SPARK-9886][CORE] Fix to use ShutdownHookManager in
Repository: spark Updated Branches: refs/heads/master 63ccdef81 -> 8a215d233 [SPARK-9886][CORE] Fix to use ShutdownHookManager in ExternalBlockStore.scala Author: Naveen Closes #10313 from naveenminchu/branch-fix-SPARK-9886. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8a215d23 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8a215d23 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8a215d23 Branch: refs/heads/master Commit: 8a215d2338c6286253e20122640592f9d69896c8 Parents: 63ccdef Author: Naveen Authored: Tue Dec 15 18:25:22 2015 -0800 Committer: Andrew Or Committed: Tue Dec 15 18:25:22 2015 -0800 -- .../apache/spark/storage/ExternalBlockStore.scala | 16 +--- 1 file changed, 5 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8a215d23/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala -- diff --git a/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala b/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala index db965d5..94883a5 100644 --- a/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala +++ b/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala @@ -22,7 +22,7 @@ import java.nio.ByteBuffer import scala.util.control.NonFatal import org.apache.spark.Logging -import org.apache.spark.util.Utils +import org.apache.spark.util.{ShutdownHookManager, Utils} /** @@ -177,15 +177,6 @@ private[spark] class ExternalBlockStore(blockManager: BlockManager, executorId: } } - private def addShutdownHook() { -Runtime.getRuntime.addShutdownHook(new Thread("ExternalBlockStore shutdown hook") { - override def run(): Unit = Utils.logUncaughtExceptions { -logDebug("Shutdown hook called") -externalBlockManager.map(_.shutdown()) - } -}) - } - // Create concrete block manager and fall back to Tachyon by default for backward compatibility. private def createBlkManager(): Option[ExternalBlockManager] = { val clsName = blockManager.conf.getOption(ExternalBlockStore.BLOCK_MANAGER_NAME) @@ -196,7 +187,10 @@ private[spark] class ExternalBlockStore(blockManager: BlockManager, executorId: .newInstance() .asInstanceOf[ExternalBlockManager] instance.init(blockManager, executorId) - addShutdownHook(); + ShutdownHookManager.addShutdownHook { () => +logDebug("Shutdown hook called") +externalBlockManager.map(_.shutdown()) + } Some(instance) } catch { case NonFatal(t) => - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-9886][CORE] Fix to use ShutdownHookManager in
Repository: spark Updated Branches: refs/heads/branch-1.6 2c324d35a -> 8e9a60031 [SPARK-9886][CORE] Fix to use ShutdownHookManager in ExternalBlockStore.scala Author: Naveen Closes #10313 from naveenminchu/branch-fix-SPARK-9886. (cherry picked from commit 8a215d2338c6286253e20122640592f9d69896c8) Signed-off-by: Andrew Or Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8e9a6003 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8e9a6003 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8e9a6003 Branch: refs/heads/branch-1.6 Commit: 8e9a600313f3047139d3cebef85acc782903123b Parents: 2c324d3 Author: Naveen Authored: Tue Dec 15 18:25:22 2015 -0800 Committer: Andrew Or Committed: Tue Dec 15 18:25:28 2015 -0800 -- .../apache/spark/storage/ExternalBlockStore.scala | 16 +--- 1 file changed, 5 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8e9a6003/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala -- diff --git a/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala b/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala index db965d5..94883a5 100644 --- a/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala +++ b/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala @@ -22,7 +22,7 @@ import java.nio.ByteBuffer import scala.util.control.NonFatal import org.apache.spark.Logging -import org.apache.spark.util.Utils +import org.apache.spark.util.{ShutdownHookManager, Utils} /** @@ -177,15 +177,6 @@ private[spark] class ExternalBlockStore(blockManager: BlockManager, executorId: } } - private def addShutdownHook() { -Runtime.getRuntime.addShutdownHook(new Thread("ExternalBlockStore shutdown hook") { - override def run(): Unit = Utils.logUncaughtExceptions { -logDebug("Shutdown hook called") -externalBlockManager.map(_.shutdown()) - } -}) - } - // Create concrete block manager and fall back to Tachyon by default for backward compatibility. private def createBlkManager(): Option[ExternalBlockManager] = { val clsName = blockManager.conf.getOption(ExternalBlockStore.BLOCK_MANAGER_NAME) @@ -196,7 +187,10 @@ private[spark] class ExternalBlockStore(blockManager: BlockManager, executorId: .newInstance() .asInstanceOf[ExternalBlockManager] instance.init(blockManager, executorId) - addShutdownHook(); + ShutdownHookManager.addShutdownHook { () => +logDebug("Shutdown hook called") +externalBlockManager.map(_.shutdown()) + } Some(instance) } catch { case NonFatal(t) => - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-10123][DEPLOY] Support specifying deploy mode from configuration
Repository: spark Updated Branches: refs/heads/master 765a48849 -> 63ccdef81 [SPARK-10123][DEPLOY] Support specifying deploy mode from configuration Please help to review, thanks a lot. Author: jerryshao Closes #10195 from jerryshao/SPARK-10123. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/63ccdef8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/63ccdef8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/63ccdef8 Branch: refs/heads/master Commit: 63ccdef81329e785807f37b4e918a9247fc70e3c Parents: 765a488 Author: jerryshao Authored: Tue Dec 15 18:24:23 2015 -0800 Committer: Andrew Or Committed: Tue Dec 15 18:24:23 2015 -0800 -- .../spark/deploy/SparkSubmitArguments.scala | 5 ++- .../apache/spark/deploy/SparkSubmitSuite.scala | 41 docs/configuration.md | 15 +-- .../apache/spark/launcher/SparkLauncher.java| 3 ++ .../launcher/SparkSubmitCommandBuilder.java | 7 ++-- 5 files changed, 64 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/63ccdef8/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index 18a1c52..915ef81 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -176,7 +176,10 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S packages = Option(packages).orElse(sparkProperties.get("spark.jars.packages")).orNull packagesExclusions = Option(packagesExclusions) .orElse(sparkProperties.get("spark.jars.excludes")).orNull -deployMode = Option(deployMode).orElse(env.get("DEPLOY_MODE")).orNull +deployMode = Option(deployMode) + .orElse(sparkProperties.get("spark.submit.deployMode")) + .orElse(env.get("DEPLOY_MODE")) + .orNull numExecutors = Option(numExecutors) .getOrElse(sparkProperties.get("spark.executor.instances").orNull) keytab = Option(keytab).orElse(sparkProperties.get("spark.yarn.keytab")).orNull http://git-wip-us.apache.org/repos/asf/spark/blob/63ccdef8/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala index d494b0c..2626f5a 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala @@ -136,6 +136,47 @@ class SparkSubmitSuite appArgs.childArgs should be (Seq("--master", "local", "some", "--weird", "args")) } + test("specify deploy mode through configuration") { +val clArgs = Seq( + "--master", "yarn", + "--conf", "spark.submit.deployMode=client", + "--class", "org.SomeClass", + "thejar.jar" +) +val appArgs = new SparkSubmitArguments(clArgs) +val (_, _, sysProps, _) = prepareSubmitEnvironment(appArgs) + +appArgs.deployMode should be ("client") +sysProps("spark.submit.deployMode") should be ("client") + +// Both cmd line and configuration are specified, cmdline option takes the priority +val clArgs1 = Seq( + "--master", "yarn", + "--deploy-mode", "cluster", + "--conf", "spark.submit.deployMode=client", + "-class", "org.SomeClass", + "thejar.jar" +) +val appArgs1 = new SparkSubmitArguments(clArgs1) +val (_, _, sysProps1, _) = prepareSubmitEnvironment(appArgs1) + +appArgs1.deployMode should be ("cluster") +sysProps1("spark.submit.deployMode") should be ("cluster") + +// Neither cmdline nor configuration are specified, client mode is the default choice +val clArgs2 = Seq( + "--master", "yarn", + "--class", "org.SomeClass", + "thejar.jar" +) +val appArgs2 = new SparkSubmitArguments(clArgs2) +appArgs2.deployMode should be (null) + +val (_, _, sysProps2, _) = prepareSubmitEnvironment(appArgs2) +appArgs2.deployMode should be ("client") +sysProps2("spark.submit.deployMode") should be ("client") + } + test("handles YARN cluster mode") { val clArgs = Seq( "--deploy-mode", "cluster", http://git-wip-us.apache.org/repos/asf/spark/blob/63ccdef8/docs/configuration.md -- diff --git a/docs/configuration.md b/docs/configuration.md index 55cf4b2..38d3d05 100644 --- a/docs/conf
spark git commit: [SPARK-9026][SPARK-4514] Modifications to JobWaiter, FutureAction, and AsyncRDDActions to support non-blocking operation
Repository: spark Updated Branches: refs/heads/master a63d9edcf -> 765a48849 [SPARK-9026][SPARK-4514] Modifications to JobWaiter, FutureAction, and AsyncRDDActions to support non-blocking operation These changes rework the implementations of `SimpleFutureAction`, `ComplexFutureAction`, `JobWaiter`, and `AsyncRDDActions` such that asynchronous callbacks on the generated `Futures` NEVER block waiting for a job to complete. A small amount of mutex synchronization is necessary to protect the internal fields that manage cancellation, but these locks are only held very briefly and in practice should almost never cause any blocking to occur. The existing blocking APIs of these classes are retained, but they simply delegate to the underlying non-blocking API and `Await` the results with indefinite timeouts. Associated JIRA ticket: https://issues.apache.org/jira/browse/SPARK-9026 Also fixes: https://issues.apache.org/jira/browse/SPARK-4514 This pull request contains all my own original work, which I release to the Spark project under its open source license. Author: Richard W. Eggert II Closes #9264 from reggert/fix-futureaction. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/765a4884 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/765a4884 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/765a4884 Branch: refs/heads/master Commit: 765a488494dac0ed38d2b81742c06467b79d96b2 Parents: a63d9ed Author: Richard W. Eggert II Authored: Tue Dec 15 18:22:58 2015 -0800 Committer: Andrew Or Committed: Tue Dec 15 18:22:58 2015 -0800 -- .../scala/org/apache/spark/FutureAction.scala | 164 +++ .../org/apache/spark/rdd/AsyncRDDActions.scala | 48 +++--- .../apache/spark/scheduler/DAGScheduler.scala | 8 +- .../org/apache/spark/scheduler/JobWaiter.scala | 48 +++--- .../test/scala/org/apache/spark/Smuggle.scala | 82 ++ .../org/apache/spark/StatusTrackerSuite.scala | 26 +++ .../apache/spark/rdd/AsyncRDDActionsSuite.scala | 33 +++- 7 files changed, 251 insertions(+), 158 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/765a4884/core/src/main/scala/org/apache/spark/FutureAction.scala -- diff --git a/core/src/main/scala/org/apache/spark/FutureAction.scala b/core/src/main/scala/org/apache/spark/FutureAction.scala index 48792a9..2a8220f 100644 --- a/core/src/main/scala/org/apache/spark/FutureAction.scala +++ b/core/src/main/scala/org/apache/spark/FutureAction.scala @@ -20,13 +20,15 @@ package org.apache.spark import java.util.Collections import java.util.concurrent.TimeUnit +import scala.concurrent._ +import scala.concurrent.duration.Duration +import scala.util.Try + +import org.apache.spark.annotation.DeveloperApi import org.apache.spark.api.java.JavaFutureAction import org.apache.spark.rdd.RDD -import org.apache.spark.scheduler.{JobFailed, JobSucceeded, JobWaiter} +import org.apache.spark.scheduler.JobWaiter -import scala.concurrent._ -import scala.concurrent.duration.Duration -import scala.util.{Failure, Try} /** * A future for the result of an action to support cancellation. This is an extension of the @@ -105,6 +107,7 @@ trait FutureAction[T] extends Future[T] { * A [[FutureAction]] holding the result of an action that triggers a single job. Examples include * count, collect, reduce. */ +@DeveloperApi class SimpleFutureAction[T] private[spark](jobWaiter: JobWaiter[_], resultFunc: => T) extends FutureAction[T] { @@ -116,142 +119,96 @@ class SimpleFutureAction[T] private[spark](jobWaiter: JobWaiter[_], resultFunc: } override def ready(atMost: Duration)(implicit permit: CanAwait): SimpleFutureAction.this.type = { -if (!atMost.isFinite()) { - awaitResult() -} else jobWaiter.synchronized { - val finishTime = System.currentTimeMillis() + atMost.toMillis - while (!isCompleted) { -val time = System.currentTimeMillis() -if (time >= finishTime) { - throw new TimeoutException -} else { - jobWaiter.wait(finishTime - time) -} - } -} +jobWaiter.completionFuture.ready(atMost) this } @throws(classOf[Exception]) override def result(atMost: Duration)(implicit permit: CanAwait): T = { -ready(atMost)(permit) -awaitResult() match { - case scala.util.Success(res) => res - case scala.util.Failure(e) => throw e -} +jobWaiter.completionFuture.ready(atMost) +assert(value.isDefined, "Future has not completed properly") +value.get.get } override def onComplete[U](func: (Try[T]) => U)(implicit executor: ExecutionContext) { -executor.execute(new Runnable { - override def run() { -func(awaitRes
spark git commit: [SPARK-9516][UI] Improvement of Thread Dump Page
Repository: spark Updated Branches: refs/heads/master c2de99a7c -> a63d9edcf [SPARK-9516][UI] Improvement of Thread Dump Page https://issues.apache.org/jira/browse/SPARK-9516 - [x] new look of Thread Dump Page - [x] click column title to sort - [x] grep - [x] search as you type squito JoshRosen It's ready for the review now Author: CodingCat Closes #7910 from CodingCat/SPARK-9516. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a63d9edc Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a63d9edc Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a63d9edc Branch: refs/heads/master Commit: a63d9edcfb8a714a17492517927aa114dea8fea0 Parents: c2de99a Author: CodingCat Authored: Tue Dec 15 18:21:00 2015 -0800 Committer: Andrew Or Committed: Tue Dec 15 18:21:00 2015 -0800 -- .../org/apache/spark/ui/static/sorttable.js | 6 +- .../org/apache/spark/ui/static/table.js | 72 +++ .../org/apache/spark/ui/static/webui.css| 10 ++- .../spark/ui/exec/ExecutorThreadDumpPage.scala | 73 +++- 4 files changed, 118 insertions(+), 43 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a63d9edc/core/src/main/resources/org/apache/spark/ui/static/sorttable.js -- diff --git a/core/src/main/resources/org/apache/spark/ui/static/sorttable.js b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js index a73d9a5..ff24147 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/sorttable.js +++ b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js @@ -169,7 +169,7 @@ sorttable = { for (var i=0; ihttp://git-wip-us.apache.org/repos/asf/spark/blob/a63d9edc/core/src/main/resources/org/apache/spark/ui/static/table.js -- diff --git a/core/src/main/resources/org/apache/spark/ui/static/table.js b/core/src/main/resources/org/apache/spark/ui/static/table.js index 656147e..14b06bf 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/table.js +++ b/core/src/main/resources/org/apache/spark/ui/static/table.js @@ -30,3 +30,75 @@ function stripeSummaryTable() { } }); } + +function toggleThreadStackTrace(threadId, forceAdd) { +var stackTrace = $("#" + threadId + "_stacktrace") +if (stackTrace.length == 0) { +var stackTraceText = $('#' + threadId + "_td_stacktrace").html() +var threadCell = $("#thread_" + threadId + "_tr") +threadCell.after("" + +stackTraceText + "") +} else { +if (!forceAdd) { +stackTrace.remove() +} +} +} + +function expandAllThreadStackTrace(toggleButton) { +$('.accordion-heading').each(function() { +//get thread ID +if (!$(this).hasClass("hidden")) { +var trId = $(this).attr('id').match(/thread_([0-9]+)_tr/m)[1] +toggleThreadStackTrace(trId, true) +} +}) +if (toggleButton) { +$('.expandbutton').toggleClass('hidden') +} +} + +function collapseAllThreadStackTrace(toggleButton) { +$('.accordion-body').each(function() { +$(this).remove() +}) +if (toggleButton) { +$('.expandbutton').toggleClass('hidden'); +} +} + + +// inOrOut - true: over, false: out +function onMouseOverAndOut(threadId) { +$("#" + threadId + "_td_id").toggleClass("threaddump-td-mouseover"); +$("#" + threadId + "_td_name").toggleClass("threaddump-td-mouseover"); +$("#" + threadId + "_td_state").toggleClass("threaddump-td-mouseover"); +} + +function onSearchStringChange() { +var searchString = $('#search').val().toLowerCase(); +//remove the stacktrace +collapseAllThreadStackTrace(false) +if (searchString.length == 0) { +$('tr').each(function() { +$(this).removeClass('hidden') +}) +} else { +$('tr').each(function(){ +if($(this).attr('id') && $(this).attr('id').match(/thread_[0-9]+_tr/) ) { +var children = $(this).children() +var found = false +for (i = 0; i < children.length; i++) { +if (children.eq(i).text().toLowerCase().indexOf(searchString) >= 0) { +found = true +} +} +if (found) { +$(this).removeClass('hidden') +} else { +$(this).addClass('hidden') +} +} +}); +} +} http://git-wip-us.apache.org/repos/asf/spark/blob/a63d9edc/core/src/main/resources/org/apache/spark/ui/static/webui.css -- diff --git a/core/src/mai
spark git commit: [SPARK-12351][MESOS] Add documentation about submitting Spark with mesos cluster mode.
Repository: spark Updated Branches: refs/heads/branch-1.6 9e4ac5645 -> 2c324d35a [SPARK-12351][MESOS] Add documentation about submitting Spark with mesos cluster mode. Adding more documentation about submitting jobs with mesos cluster mode. Author: Timothy Chen Closes #10086 from tnachen/mesos_supervise_docs. (cherry picked from commit c2de99a7c3a52b0da96517c7056d2733ef45495f) Signed-off-by: Andrew Or Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2c324d35 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2c324d35 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2c324d35 Branch: refs/heads/branch-1.6 Commit: 2c324d35a698b353c2193e2f9bd8ba08c741c548 Parents: 9e4ac56 Author: Timothy Chen Authored: Tue Dec 15 18:20:00 2015 -0800 Committer: Andrew Or Committed: Tue Dec 15 18:20:09 2015 -0800 -- docs/running-on-mesos.md| 26 +- docs/submitting-applications.md | 15 ++- 2 files changed, 35 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2c324d35/docs/running-on-mesos.md -- diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md index a197d0e..3193e17 100644 --- a/docs/running-on-mesos.md +++ b/docs/running-on-mesos.md @@ -150,14 +150,30 @@ it does not need to be redundantly passed in as a system property. Spark on Mesos also supports cluster mode, where the driver is launched in the cluster and the client can find the results of the driver from the Mesos Web UI. -To use cluster mode, you must start the MesosClusterDispatcher in your cluster via the `sbin/start-mesos-dispatcher.sh` script, -passing in the Mesos master url (e.g: mesos://host:5050). +To use cluster mode, you must start the `MesosClusterDispatcher` in your cluster via the `sbin/start-mesos-dispatcher.sh` script, +passing in the Mesos master URL (e.g: mesos://host:5050). This starts the `MesosClusterDispatcher` as a daemon running on the host. -From the client, you can submit a job to Mesos cluster by running `spark-submit` and specifying the master url -to the url of the MesosClusterDispatcher (e.g: mesos://dispatcher:7077). You can view driver statuses on the +If you like to run the `MesosClusterDispatcher` with Marathon, you need to run the `MesosClusterDispatcher` in the foreground (i.e: `bin/spark-class org.apache.spark.deploy.mesos.MesosClusterDispatcher`). + +From the client, you can submit a job to Mesos cluster by running `spark-submit` and specifying the master URL +to the URL of the `MesosClusterDispatcher` (e.g: mesos://dispatcher:7077). You can view driver statuses on the Spark cluster Web UI. -Note that jars or python files that are passed to spark-submit should be URIs reachable by Mesos slaves. +For example: +{% highlight bash %} +./bin/spark-submit \ + --class org.apache.spark.examples.SparkPi \ + --master mesos://207.184.161.138:7077 \ + --deploy-mode cluster + --supervise + --executor-memory 20G \ + --total-executor-cores 100 \ + http://path/to/examples.jar \ + 1000 +{% endhighlight %} + + +Note that jars or python files that are passed to spark-submit should be URIs reachable by Mesos slaves, as the Spark driver doesn't automatically upload local jars. # Mesos Run Modes http://git-wip-us.apache.org/repos/asf/spark/blob/2c324d35/docs/submitting-applications.md -- diff --git a/docs/submitting-applications.md b/docs/submitting-applications.md index ac2a14e..acbb0f2 100644 --- a/docs/submitting-applications.md +++ b/docs/submitting-applications.md @@ -115,6 +115,18 @@ export HADOOP_CONF_DIR=XXX --master spark://207.184.161.138:7077 \ examples/src/main/python/pi.py \ 1000 + +# Run on a Mesos cluster in cluster deploy mode with supervise +./bin/spark-submit \ + --class org.apache.spark.examples.SparkPi \ + --master mesos://207.184.161.138:7077 \ + --deploy-mode cluster + --supervise + --executor-memory 20G \ + --total-executor-cores 100 \ + http://path/to/examples.jar \ + 1000 + {% endhighlight %} # Master URLs @@ -132,9 +144,10 @@ The master URL passed to Spark can be in one of the following formats: mesos://HOST:PORT Connect to the given Mesos cluster. The port must be whichever one your is configured to use, which is 5050 by default. Or, for a Mesos cluster using ZooKeeper, use mesos://zk:// +To submit with --deploy-mode cluster, the HOST:PORT should be configured to connect to the MesosClusterDispatcher. yarn Connect to a YARN cluster in -client or cluster mode depending on the value of --deploy-mode. +client or cluster mode depending on the value of --deplo
spark git commit: [SPARK-12351][MESOS] Add documentation about submitting Spark with mesos cluster mode.
Repository: spark Updated Branches: refs/heads/master 369127f03 -> c2de99a7c [SPARK-12351][MESOS] Add documentation about submitting Spark with mesos cluster mode. Adding more documentation about submitting jobs with mesos cluster mode. Author: Timothy Chen Closes #10086 from tnachen/mesos_supervise_docs. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c2de99a7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c2de99a7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c2de99a7 Branch: refs/heads/master Commit: c2de99a7c3a52b0da96517c7056d2733ef45495f Parents: 369127f Author: Timothy Chen Authored: Tue Dec 15 18:20:00 2015 -0800 Committer: Andrew Or Committed: Tue Dec 15 18:20:00 2015 -0800 -- docs/running-on-mesos.md| 26 +- docs/submitting-applications.md | 15 ++- 2 files changed, 35 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c2de99a7/docs/running-on-mesos.md -- diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md index a197d0e..3193e17 100644 --- a/docs/running-on-mesos.md +++ b/docs/running-on-mesos.md @@ -150,14 +150,30 @@ it does not need to be redundantly passed in as a system property. Spark on Mesos also supports cluster mode, where the driver is launched in the cluster and the client can find the results of the driver from the Mesos Web UI. -To use cluster mode, you must start the MesosClusterDispatcher in your cluster via the `sbin/start-mesos-dispatcher.sh` script, -passing in the Mesos master url (e.g: mesos://host:5050). +To use cluster mode, you must start the `MesosClusterDispatcher` in your cluster via the `sbin/start-mesos-dispatcher.sh` script, +passing in the Mesos master URL (e.g: mesos://host:5050). This starts the `MesosClusterDispatcher` as a daemon running on the host. -From the client, you can submit a job to Mesos cluster by running `spark-submit` and specifying the master url -to the url of the MesosClusterDispatcher (e.g: mesos://dispatcher:7077). You can view driver statuses on the +If you like to run the `MesosClusterDispatcher` with Marathon, you need to run the `MesosClusterDispatcher` in the foreground (i.e: `bin/spark-class org.apache.spark.deploy.mesos.MesosClusterDispatcher`). + +From the client, you can submit a job to Mesos cluster by running `spark-submit` and specifying the master URL +to the URL of the `MesosClusterDispatcher` (e.g: mesos://dispatcher:7077). You can view driver statuses on the Spark cluster Web UI. -Note that jars or python files that are passed to spark-submit should be URIs reachable by Mesos slaves. +For example: +{% highlight bash %} +./bin/spark-submit \ + --class org.apache.spark.examples.SparkPi \ + --master mesos://207.184.161.138:7077 \ + --deploy-mode cluster + --supervise + --executor-memory 20G \ + --total-executor-cores 100 \ + http://path/to/examples.jar \ + 1000 +{% endhighlight %} + + +Note that jars or python files that are passed to spark-submit should be URIs reachable by Mesos slaves, as the Spark driver doesn't automatically upload local jars. # Mesos Run Modes http://git-wip-us.apache.org/repos/asf/spark/blob/c2de99a7/docs/submitting-applications.md -- diff --git a/docs/submitting-applications.md b/docs/submitting-applications.md index ac2a14e..acbb0f2 100644 --- a/docs/submitting-applications.md +++ b/docs/submitting-applications.md @@ -115,6 +115,18 @@ export HADOOP_CONF_DIR=XXX --master spark://207.184.161.138:7077 \ examples/src/main/python/pi.py \ 1000 + +# Run on a Mesos cluster in cluster deploy mode with supervise +./bin/spark-submit \ + --class org.apache.spark.examples.SparkPi \ + --master mesos://207.184.161.138:7077 \ + --deploy-mode cluster + --supervise + --executor-memory 20G \ + --total-executor-cores 100 \ + http://path/to/examples.jar \ + 1000 + {% endhighlight %} # Master URLs @@ -132,9 +144,10 @@ The master URL passed to Spark can be in one of the following formats: mesos://HOST:PORT Connect to the given Mesos cluster. The port must be whichever one your is configured to use, which is 5050 by default. Or, for a Mesos cluster using ZooKeeper, use mesos://zk:// +To submit with --deploy-mode cluster, the HOST:PORT should be configured to connect to the MesosClusterDispatcher. yarn Connect to a YARN cluster in -client or cluster mode depending on the value of --deploy-mode. +client or cluster mode depending on the value of --deploy-mode. The cluster location will be found based on the HADOOP_CONF_DIR or YARN_CONF_DIR varia
spark git commit: [SPARK-12130] Replace shuffleManagerClass with shortShuffleMgrNames in ExternalShuffleBlockResolver
Repository: spark Updated Branches: refs/heads/master f725b2ec1 -> 369127f03 [SPARK-12130] Replace shuffleManagerClass with shortShuffleMgrNames in ExternalShuffleBlockResolver Replace shuffleManagerClassName with shortShuffleMgrName is to reduce time of string's comparison. and put sort's comparison on the front. cc JoshRosen andrewor14 Author: Lianhui Wang Closes #10131 from lianhuiwang/spark-12130. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/369127f0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/369127f0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/369127f0 Branch: refs/heads/master Commit: 369127f03257e7081d2aa1fc445e773b26f0d5e3 Parents: f725b2e Author: Lianhui Wang Authored: Tue Dec 15 18:16:22 2015 -0800 Committer: Andrew Or Committed: Tue Dec 15 18:17:48 2015 -0800 -- .../main/scala/org/apache/spark/shuffle/ShuffleManager.scala | 4 .../org/apache/spark/shuffle/hash/HashShuffleManager.scala| 2 ++ .../org/apache/spark/shuffle/sort/SortShuffleManager.scala| 2 ++ .../main/scala/org/apache/spark/storage/BlockManager.scala| 2 +- .../spark/network/shuffle/ExternalShuffleBlockResolver.java | 7 +++ .../org/apache/spark/network/sasl/SaslIntegrationSuite.java | 3 +-- .../network/shuffle/ExternalShuffleBlockResolverSuite.java| 6 +++--- .../network/shuffle/ExternalShuffleIntegrationSuite.java | 4 ++-- 8 files changed, 18 insertions(+), 12 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/369127f0/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala -- diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala index 978366d..a3444bf 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala @@ -28,6 +28,10 @@ import org.apache.spark.{TaskContext, ShuffleDependency} * boolean isDriver as parameters. */ private[spark] trait ShuffleManager { + + /** Return short name for the ShuffleManager */ + val shortName: String + /** * Register a shuffle with the manager and obtain a handle for it to pass to tasks. */ http://git-wip-us.apache.org/repos/asf/spark/blob/369127f0/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleManager.scala -- diff --git a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleManager.scala index d2e2fc4..4f30da0 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleManager.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleManager.scala @@ -34,6 +34,8 @@ private[spark] class HashShuffleManager(conf: SparkConf) extends ShuffleManager private val fileShuffleBlockResolver = new FileShuffleBlockResolver(conf) + override val shortName: String = "hash" + /* Register a shuffle with the manager and obtain a handle for it to pass to tasks. */ override def registerShuffle[K, V, C]( shuffleId: Int, http://git-wip-us.apache.org/repos/asf/spark/blob/369127f0/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala -- diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala index 66b6bbc..9b1a279 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala @@ -79,6 +79,8 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager */ private[this] val numMapsForShuffle = new ConcurrentHashMap[Int, Int]() + override val shortName: String = "sort" + override val shuffleBlockResolver = new IndexShuffleBlockResolver(conf) /** http://git-wip-us.apache.org/repos/asf/spark/blob/369127f0/core/src/main/scala/org/apache/spark/storage/BlockManager.scala -- diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala index ed05143..540e1ec 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala @@ -200,7 +200,7 @@ private[spark] class BlockManager( val shuffleConfig = new ExecutorShuffleInfo( diskBlockManager.localDirs.map(
spark git commit: [SPARK-12056][CORE] Part 2 Create a TaskAttemptContext only after calling setConf
Repository: spark Updated Branches: refs/heads/branch-1.6 08aa3b47e -> 9e4ac5645 [SPARK-12056][CORE] Part 2 Create a TaskAttemptContext only after calling setConf This is continuation of SPARK-12056 where change is applied to SqlNewHadoopRDD.scala andrewor14 FYI Author: tedyu Closes #10164 from tedyu/master. (cherry picked from commit f725b2ec1ab0d89e35b5e2d3ddeddb79fec85f6d) Signed-off-by: Andrew Or Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9e4ac564 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9e4ac564 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9e4ac564 Branch: refs/heads/branch-1.6 Commit: 9e4ac56452710ddd8efb695e69c8de49317e3f28 Parents: 08aa3b4 Author: tedyu Authored: Tue Dec 15 18:15:10 2015 -0800 Committer: Andrew Or Committed: Tue Dec 15 18:15:53 2015 -0800 -- .../apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9e4ac564/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala index 56cb63d..eea780c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala @@ -148,14 +148,14 @@ private[spark] class SqlNewHadoopRDD[V: ClassTag]( } inputMetrics.setBytesReadCallback(bytesReadCallback) - val attemptId = newTaskAttemptID(jobTrackerId, id, isMap = true, split.index, 0) - val hadoopAttemptContext = newTaskAttemptContext(conf, attemptId) val format = inputFormatClass.newInstance format match { case configurable: Configurable => configurable.setConf(conf) case _ => } + val attemptId = newTaskAttemptID(jobTrackerId, id, isMap = true, split.index, 0) + val hadoopAttemptContext = newTaskAttemptContext(conf, attemptId) private[this] var reader: RecordReader[Void, V] = null /** - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-12056][CORE] Part 2 Create a TaskAttemptContext only after calling setConf
Repository: spark Updated Branches: refs/heads/master 840bd2e00 -> f725b2ec1 [SPARK-12056][CORE] Part 2 Create a TaskAttemptContext only after calling setConf This is continuation of SPARK-12056 where change is applied to SqlNewHadoopRDD.scala andrewor14 FYI Author: tedyu Closes #10164 from tedyu/master. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f725b2ec Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f725b2ec Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f725b2ec Branch: refs/heads/master Commit: f725b2ec1ab0d89e35b5e2d3ddeddb79fec85f6d Parents: 840bd2e Author: tedyu Authored: Tue Dec 15 18:15:10 2015 -0800 Committer: Andrew Or Committed: Tue Dec 15 18:15:10 2015 -0800 -- .../apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f725b2ec/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala index 56cb63d..eea780c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala @@ -148,14 +148,14 @@ private[spark] class SqlNewHadoopRDD[V: ClassTag]( } inputMetrics.setBytesReadCallback(bytesReadCallback) - val attemptId = newTaskAttemptID(jobTrackerId, id, isMap = true, split.index, 0) - val hadoopAttemptContext = newTaskAttemptContext(conf, attemptId) val format = inputFormatClass.newInstance format match { case configurable: Configurable => configurable.setConf(conf) case _ => } + val attemptId = newTaskAttemptID(jobTrackerId, id, isMap = true, split.index, 0) + val hadoopAttemptContext = newTaskAttemptContext(conf, attemptId) private[this] var reader: RecordReader[Void, V] = null /** - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [HOTFIX] Compile error from commit 31b3910
Repository: spark Updated Branches: refs/heads/master 31b391019 -> 840bd2e00 [HOTFIX] Compile error from commit 31b3910 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/840bd2e0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/840bd2e0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/840bd2e0 Branch: refs/heads/master Commit: 840bd2e008da5b22bfa73c587ea2c57666fffc60 Parents: 31b3910 Author: Andrew Or Authored: Tue Dec 15 18:11:53 2015 -0800 Committer: Andrew Or Committed: Tue Dec 15 18:11:53 2015 -0800 -- sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/840bd2e0/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala index b69d441..33b03be 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala @@ -234,7 +234,7 @@ class DataFrame private[sql]( // For Data that has more than "_numRows" records if (hasMoreData) { val rowsString = if (_numRows == 1) "row" else "rows" - sb.append(s"only showing top $_numRows $rowsString\n") + sb.append(s"only showing top ${_numRows} $rowsString\n") } sb.toString() - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-12105] [SQL] add convenient show functions
Repository: spark Updated Branches: refs/heads/master 28112657e -> 31b391019 [SPARK-12105] [SQL] add convenient show functions Author: Jean-Baptiste Onofré Closes #10130 from jbonofre/SPARK-12105. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/31b39101 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/31b39101 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/31b39101 Branch: refs/heads/master Commit: 31b391019ff6eb5a483f4b3e62fd082de7ff8416 Parents: 2811265 Author: Jean-Baptiste Onofré Authored: Tue Dec 15 18:06:30 2015 -0800 Committer: Andrew Or Committed: Tue Dec 15 18:08:09 2015 -0800 -- .../scala/org/apache/spark/sql/DataFrame.scala | 25 +--- 1 file changed, 16 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/31b39101/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala index 497bd48..b69d441 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala @@ -161,16 +161,23 @@ class DataFrame private[sql]( } /** +* Compose the string representing rows for output +*/ + def showString(): String = { +showString(20) + } + + /** * Compose the string representing rows for output - * @param _numRows Number of rows to show + * @param numRows Number of rows to show * @param truncate Whether truncate long strings and align cells right */ - private[sql] def showString(_numRows: Int, truncate: Boolean = true): String = { -val numRows = _numRows.max(0) + def showString(numRows: Int, truncate: Boolean = true): String = { +val _numRows = numRows.max(0) val sb = new StringBuilder -val takeResult = take(numRows + 1) -val hasMoreData = takeResult.length > numRows -val data = takeResult.take(numRows) +val takeResult = take(_numRows + 1) +val hasMoreData = takeResult.length > _numRows +val data = takeResult.take(_numRows) val numCols = schema.fieldNames.length // For array values, replace Seq and Array with square brackets @@ -224,10 +231,10 @@ class DataFrame private[sql]( sb.append(sep) -// For Data that has more than "numRows" records +// For Data that has more than "_numRows" records if (hasMoreData) { - val rowsString = if (numRows == 1) "row" else "rows" - sb.append(s"only showing top $numRows $rowsString\n") + val rowsString = if (_numRows == 1) "row" else "rows" + sb.append(s"only showing top $_numRows $rowsString\n") } sb.toString() - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-12236][SQL] JDBC filter tests all pass if filters are not really pushed down
Repository: spark Updated Branches: refs/heads/master 86ea64dd1 -> 28112657e [SPARK-12236][SQL] JDBC filter tests all pass if filters are not really pushed down https://issues.apache.org/jira/browse/SPARK-12236 Currently JDBC filters are not tested properly. All the tests pass even if the filters are not pushed down due to Spark-side filtering. In this PR, Firstly, I corrected the tests to properly check the pushed down filters by removing Spark-side filtering. Also, `!=` was being tested which is actually not pushed down. So I removed them. Lastly, I moved the `stripSparkFilter()` function to `SQLTestUtils` as this functions would be shared for all tests for pushed down filters. This function would be also shared with ORC datasource as the filters for that are also not being tested properly. Author: hyukjinkwon Closes #10221 from HyukjinKwon/SPARK-12236. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/28112657 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/28112657 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/28112657 Branch: refs/heads/master Commit: 28112657ea5919451291c21b4b8e1eb3db0ec8d4 Parents: 86ea64d Author: hyukjinkwon Authored: Tue Dec 15 17:02:14 2015 -0800 Committer: Michael Armbrust Committed: Tue Dec 15 17:02:14 2015 -0800 -- .../datasources/parquet/ParquetFilterSuite.scala | 15 --- .../scala/org/apache/spark/sql/jdbc/JDBCSuite.scala | 10 -- .../org/apache/spark/sql/test/SQLTestUtils.scala | 15 +++ 3 files changed, 19 insertions(+), 21 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/28112657/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala index daf41bc..6178e37 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -110,21 +110,6 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex checkBinaryFilterPredicate(predicate, filterClass, Seq(Row(expected)))(df) } - /** - * Strip Spark-side filtering in order to check if a datasource filters rows correctly. - */ - protected def stripSparkFilter(df: DataFrame): DataFrame = { -val schema = df.schema -val childRDD = df - .queryExecution - .executedPlan.asInstanceOf[org.apache.spark.sql.execution.Filter] - .child - .execute() - .map(row => Row.fromSeq(row.toSeq(schema))) - -sqlContext.createDataFrame(childRDD, schema) - } - test("filter pushdown - boolean") { withParquetDataFrame((true :: false :: Nil).map(b => Tuple1.apply(Option(b { implicit df => checkFilterPredicate('_1.isNull, classOf[Eq[_]], Seq.empty[Row]) http://git-wip-us.apache.org/repos/asf/spark/blob/28112657/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index 8c24aa3..a360947 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -176,12 +176,10 @@ class JDBCSuite extends SparkFunSuite with BeforeAndAfter with SharedSQLContext } test("SELECT * WHERE (simple predicates)") { -assert(sql("SELECT * FROM foobar WHERE THEID < 1").collect().size === 0) -assert(sql("SELECT * FROM foobar WHERE THEID != 2").collect().size === 2) -assert(sql("SELECT * FROM foobar WHERE THEID = 1").collect().size === 1) -assert(sql("SELECT * FROM foobar WHERE NAME = 'fred'").collect().size === 1) -assert(sql("SELECT * FROM foobar WHERE NAME > 'fred'").collect().size === 2) -assert(sql("SELECT * FROM foobar WHERE NAME != 'fred'").collect().size === 2) +assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID < 1")).collect().size === 0) +assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID = 1")).collect().size === 1) +assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME = 'fred'")).collect().size === 1) +assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME > 'fred'")).collect().size === 2) } test("SELECT * WHERE (quoted strings)") { http://git-wip-us.apache.org/repos/asf/spark/bl
spark git commit: [SPARK-12271][SQL] Improve error message when Dataset.as[ ] has incompatible schemas.
Repository: spark Updated Branches: refs/heads/master b24c12d73 -> 86ea64dd1 [SPARK-12271][SQL] Improve error message when Dataset.as[ ] has incompatible schemas. Author: Nong Li Closes #10260 from nongli/spark-11271. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/86ea64dd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/86ea64dd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/86ea64dd Branch: refs/heads/master Commit: 86ea64dd146757c8f997d05fb5bb44f6aa58515c Parents: b24c12d Author: Nong Li Authored: Tue Dec 15 16:55:58 2015 -0800 Committer: Michael Armbrust Committed: Tue Dec 15 16:55:58 2015 -0800 -- .../org/apache/spark/sql/catalyst/ScalaReflection.scala | 2 +- .../spark/sql/catalyst/encoders/ExpressionEncoder.scala | 1 + .../apache/spark/sql/catalyst/expressions/objects.scala | 12 +++- .../test/scala/org/apache/spark/sql/DatasetSuite.scala | 10 +- 4 files changed, 18 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/86ea64dd/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala index 9013fd0..ecff860 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala @@ -184,7 +184,7 @@ object ScalaReflection extends ScalaReflection { val TypeRef(_, _, Seq(optType)) = t val className = getClassNameFromType(optType) val newTypePath = s"""- option value class: "$className +: walkedTypePath -WrapOption(constructorFor(optType, path, newTypePath)) +WrapOption(constructorFor(optType, path, newTypePath), dataTypeFor(optType)) case t if t <:< localTypeOf[java.lang.Integer] => val boxedType = classOf[java.lang.Integer] http://git-wip-us.apache.org/repos/asf/spark/blob/86ea64dd/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala index 3e8420e..363178b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala @@ -251,6 +251,7 @@ case class ExpressionEncoder[T]( val plan = Project(Alias(unbound, "")() :: Nil, LocalRelation(schema)) val analyzedPlan = SimpleAnalyzer.execute(plan) +SimpleAnalyzer.checkAnalysis(analyzedPlan) val optimizedPlan = SimplifyCasts(analyzedPlan) // In order to construct instances of inner classes (for example those declared in a REPL cell), http://git-wip-us.apache.org/repos/asf/spark/blob/86ea64dd/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala index 96bc4fe..10ec75e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala @@ -23,11 +23,9 @@ import scala.reflect.ClassTag import org.apache.spark.SparkConf import org.apache.spark.serializer._ import org.apache.spark.sql.Row -import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer -import org.apache.spark.sql.catalyst.plans.logical.{Project, LocalRelation} -import org.apache.spark.sql.catalyst.util.GenericArrayData import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.expressions.codegen.{GeneratedExpressionCode, CodeGenContext} +import org.apache.spark.sql.catalyst.expressions.codegen.{CodeGenContext, GeneratedExpressionCode} +import org.apache.spark.sql.catalyst.util.GenericArrayData import org.apache.spark.sql.types._ /** @@ -295,13 +293,17 @@ case class UnwrapOption( * Converts the result of evaluating `child` into an option, checking both the isNull bit and * (in the case of reference types) equality with null. * @param child The expression to evaluate and wrap. + * @param optType The type of this option. */ -case class WrapOption(child: Expression)
spark git commit: [MINOR][ML] Rename weights to coefficients for examples/DeveloperApiExample
Repository: spark Updated Branches: refs/heads/master bc1ff9f4a -> b24c12d73 [MINOR][ML] Rename weights to coefficients for examples/DeveloperApiExample Rename ```weights``` to ```coefficients``` for examples/DeveloperApiExample. cc mengxr jkbradley Author: Yanbo Liang Closes #10280 from yanboliang/spark-coefficients. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b24c12d7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b24c12d7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b24c12d7 Branch: refs/heads/master Commit: b24c12d7338b47b637698e7458ba90f34cba28c0 Parents: bc1ff9f Author: Yanbo Liang Authored: Tue Dec 15 16:29:39 2015 -0800 Committer: Joseph K. Bradley Committed: Tue Dec 15 16:29:39 2015 -0800 -- .../examples/ml/JavaDeveloperApiExample.java| 22 ++-- .../spark/examples/ml/DeveloperApiExample.scala | 16 +++--- 2 files changed, 19 insertions(+), 19 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b24c12d7/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java -- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java index 0b4c0d9..b9dd3ad 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java @@ -89,7 +89,7 @@ public class JavaDeveloperApiExample { } if (sumPredictions != 0.0) { throw new Exception("MyJavaLogisticRegression predicted something other than 0," + - " even though all weights are 0!"); + " even though all coefficients are 0!"); } jsc.stop(); @@ -149,12 +149,12 @@ class MyJavaLogisticRegression // Extract columns from data using helper method. JavaRDD oldDataset = extractLabeledPoints(dataset).toJavaRDD(); -// Do learning to estimate the weight vector. +// Do learning to estimate the coefficients vector. int numFeatures = oldDataset.take(1).get(0).features().size(); -Vector weights = Vectors.zeros(numFeatures); // Learning would happen here. +Vector coefficients = Vectors.zeros(numFeatures); // Learning would happen here. // Create a model, and return it. -return new MyJavaLogisticRegressionModel(uid(), weights).setParent(this); +return new MyJavaLogisticRegressionModel(uid(), coefficients).setParent(this); } @Override @@ -173,12 +173,12 @@ class MyJavaLogisticRegression class MyJavaLogisticRegressionModel extends ClassificationModel { - private Vector weights_; - public Vector weights() { return weights_; } + private Vector coefficients_; + public Vector coefficients() { return coefficients_; } - public MyJavaLogisticRegressionModel(String uid, Vector weights) { + public MyJavaLogisticRegressionModel(String uid, Vector coefficients) { this.uid_ = uid; -this.weights_ = weights; +this.coefficients_ = coefficients; } private String uid_ = Identifiable$.MODULE$.randomUID("myJavaLogReg"); @@ -208,7 +208,7 @@ class MyJavaLogisticRegressionModel * modifier. */ public Vector predictRaw(Vector features) { -double margin = BLAS.dot(features, weights_); +double margin = BLAS.dot(features, coefficients_); // There are 2 classes (binary classification), so we return a length-2 vector, // where index i corresponds to class i (i = 0, 1). return Vectors.dense(-margin, margin); @@ -222,7 +222,7 @@ class MyJavaLogisticRegressionModel /** * Number of features the model was trained on. */ - public int numFeatures() { return weights_.size(); } + public int numFeatures() { return coefficients_.size(); } /** * Create a copy of the model. @@ -235,7 +235,7 @@ class MyJavaLogisticRegressionModel */ @Override public MyJavaLogisticRegressionModel copy(ParamMap extra) { -return copyValues(new MyJavaLogisticRegressionModel(uid(), weights_), extra) +return copyValues(new MyJavaLogisticRegressionModel(uid(), coefficients_), extra) .setParent(parent()); } } http://git-wip-us.apache.org/repos/asf/spark/blob/b24c12d7/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala -- diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala index 3758edc..c1f63c6 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala +++ b/examples/src/
[1/2] spark git commit: Preparing Spark release v1.6.0-rc3
Repository: spark Updated Branches: refs/heads/branch-1.6 80d261718 -> 08aa3b47e Preparing Spark release v1.6.0-rc3 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/00a39d9c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/00a39d9c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/00a39d9c Branch: refs/heads/branch-1.6 Commit: 00a39d9c05c55b5ffcd4f49aadc91cedf227669a Parents: 80d2617 Author: Patrick Wendell Authored: Tue Dec 15 15:09:57 2015 -0800 Committer: Patrick Wendell Committed: Tue Dec 15 15:09:57 2015 -0800 -- assembly/pom.xml| 2 +- bagel/pom.xml | 2 +- core/pom.xml| 2 +- docker-integration-tests/pom.xml| 2 +- examples/pom.xml| 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml | 2 +- external/kafka-assembly/pom.xml | 2 +- external/kafka/pom.xml | 2 +- external/mqtt-assembly/pom.xml | 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml| 2 +- external/zeromq/pom.xml | 2 +- extras/java8-tests/pom.xml | 2 +- extras/kinesis-asl-assembly/pom.xml | 2 +- extras/kinesis-asl/pom.xml | 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- launcher/pom.xml| 2 +- mllib/pom.xml | 2 +- network/common/pom.xml | 2 +- network/shuffle/pom.xml | 2 +- network/yarn/pom.xml| 2 +- pom.xml | 2 +- repl/pom.xml| 2 +- sql/catalyst/pom.xml| 2 +- sql/core/pom.xml| 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml| 2 +- streaming/pom.xml | 2 +- tags/pom.xml| 2 +- tools/pom.xml | 2 +- unsafe/pom.xml | 2 +- yarn/pom.xml| 2 +- 35 files changed, 35 insertions(+), 35 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/00a39d9c/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 4b60ee0..fbabaa5 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0-SNAPSHOT +1.6.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/00a39d9c/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index 672e946..1b3e417 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0-SNAPSHOT +1.6.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/00a39d9c/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index 61744bb..15b8d75 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0-SNAPSHOT +1.6.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/00a39d9c/docker-integration-tests/pom.xml -- diff --git a/docker-integration-tests/pom.xml b/docker-integration-tests/pom.xml index 39d3f34..d579879 100644 --- a/docker-integration-tests/pom.xml +++ b/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.10 -1.6.0-SNAPSHOT +1.6.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/00a39d9c/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index f5ab2a7..37b15bb 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0-SNAPSHOT +1.6.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/00a39d9c/external/flume-assembly/pom.xml -- diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml index dceedcf..295455a 100644 --- a/external/flume-assembly/pom.xml +++ b/external/flume-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0-SNAPSHOT +1.6.0 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/00a39d9c/external/flume-sink/pom.xml -- diff --gi
[spark] Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.6.0-rc3 [created] 00a39d9c0 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[2/2] spark git commit: Preparing development version 1.6.0-SNAPSHOT
Preparing development version 1.6.0-SNAPSHOT Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/08aa3b47 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/08aa3b47 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/08aa3b47 Branch: refs/heads/branch-1.6 Commit: 08aa3b47e6a295a8297e741effa14cd0d834aea8 Parents: 00a39d9 Author: Patrick Wendell Authored: Tue Dec 15 15:10:04 2015 -0800 Committer: Patrick Wendell Committed: Tue Dec 15 15:10:04 2015 -0800 -- assembly/pom.xml| 2 +- bagel/pom.xml | 2 +- core/pom.xml| 2 +- docker-integration-tests/pom.xml| 2 +- examples/pom.xml| 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml | 2 +- external/kafka-assembly/pom.xml | 2 +- external/kafka/pom.xml | 2 +- external/mqtt-assembly/pom.xml | 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml| 2 +- external/zeromq/pom.xml | 2 +- extras/java8-tests/pom.xml | 2 +- extras/kinesis-asl-assembly/pom.xml | 2 +- extras/kinesis-asl/pom.xml | 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- launcher/pom.xml| 2 +- mllib/pom.xml | 2 +- network/common/pom.xml | 2 +- network/shuffle/pom.xml | 2 +- network/yarn/pom.xml| 2 +- pom.xml | 2 +- repl/pom.xml| 2 +- sql/catalyst/pom.xml| 2 +- sql/core/pom.xml| 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml| 2 +- streaming/pom.xml | 2 +- tags/pom.xml| 2 +- tools/pom.xml | 2 +- unsafe/pom.xml | 2 +- yarn/pom.xml| 2 +- 35 files changed, 35 insertions(+), 35 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/08aa3b47/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index fbabaa5..4b60ee0 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0 +1.6.0-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/08aa3b47/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index 1b3e417..672e946 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0 +1.6.0-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/08aa3b47/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index 15b8d75..61744bb 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0 +1.6.0-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/08aa3b47/docker-integration-tests/pom.xml -- diff --git a/docker-integration-tests/pom.xml b/docker-integration-tests/pom.xml index d579879..39d3f34 100644 --- a/docker-integration-tests/pom.xml +++ b/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.10 -1.6.0 +1.6.0-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/08aa3b47/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index 37b15bb..f5ab2a7 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0 +1.6.0-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/08aa3b47/external/flume-assembly/pom.xml -- diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml index 295455a..dceedcf 100644 --- a/external/flume-assembly/pom.xml +++ b/external/flume-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0 +1.6.0-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/08aa3b47/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index 31b907f
[3/3] spark git commit: Update branch-1.6 for 1.6.0 release
Update branch-1.6 for 1.6.0 release Author: Michael Armbrust Closes #10317 from marmbrus/versions. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/80d26171 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/80d26171 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/80d26171 Branch: refs/heads/branch-1.6 Commit: 80d261718c1157e5cd4b0ac27e36ef919ea65afa Parents: 23c8846 Author: Michael Armbrust Authored: Tue Dec 15 15:03:33 2015 -0800 Committer: Michael Armbrust Committed: Tue Dec 15 15:03:33 2015 -0800 -- CHANGES.txt | 34172 + .../main/scala/org/apache/spark/package.scala | 2 +- dev/create-release/generate-changelist.py | 4 +- docs/_config.yml| 2 +- 4 files changed, 34176 insertions(+), 4 deletions(-) -- - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[1/3] spark git commit: Update branch-1.6 for 1.6.0 release
Repository: spark Updated Branches: refs/heads/branch-1.6 23c884605 -> 80d261718 http://git-wip-us.apache.org/repos/asf/spark/blob/80d26171/core/src/main/scala/org/apache/spark/package.scala -- diff --git a/core/src/main/scala/org/apache/spark/package.scala b/core/src/main/scala/org/apache/spark/package.scala index 7515aad..e37bf09 100644 --- a/core/src/main/scala/org/apache/spark/package.scala +++ b/core/src/main/scala/org/apache/spark/package.scala @@ -43,5 +43,5 @@ package org.apache package object spark { // For package docs only - val SPARK_VERSION = "1.6.0-SNAPSHOT" + val SPARK_VERSION = "1.6.0" } http://git-wip-us.apache.org/repos/asf/spark/blob/80d26171/dev/create-release/generate-changelist.py -- diff --git a/dev/create-release/generate-changelist.py b/dev/create-release/generate-changelist.py index 2e1a35a..9798158 100755 --- a/dev/create-release/generate-changelist.py +++ b/dev/create-release/generate-changelist.py @@ -31,8 +31,8 @@ import time import traceback SPARK_HOME = os.environ["SPARK_HOME"] -NEW_RELEASE_VERSION = "1.0.0" -PREV_RELEASE_GIT_TAG = "v0.9.1" +NEW_RELEASE_VERSION = "1.6.0" +PREV_RELEASE_GIT_TAG = "v1.5.2" CHANGELIST = "CHANGES.txt" OLD_CHANGELIST = "%s.old" % (CHANGELIST) http://git-wip-us.apache.org/repos/asf/spark/blob/80d26171/docs/_config.yml -- diff --git a/docs/_config.yml b/docs/_config.yml index 2c70b76..e5fde18 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -14,7 +14,7 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 1.6.0-SNAPSHOT +SPARK_VERSION: 1.6.0 SPARK_VERSION_SHORT: 1.6.0 SCALA_BINARY_VERSION: "2.10" SCALA_VERSION: "2.10.5" - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[2/3] spark git commit: Update branch-1.6 for 1.6.0 release
http://git-wip-us.apache.org/repos/asf/spark/blob/80d26171/CHANGES.txt -- diff --git a/CHANGES.txt b/CHANGES.txt new file mode 100644 index 000..ff59371 --- /dev/null +++ b/CHANGES.txt @@ -0,0 +1,34172 @@ +Spark Change Log + + +Release 1.6.0 + + [STREAMING][MINOR] Fix typo in function name of StateImpl + jerryshao + 2015-12-15 09:41:40 -0800 + Commit: 23c8846, github.com/apache/spark/pull/10305 + + [SPARK-12327] Disable commented code lintr temporarily + Shivaram Venkataraman + 2015-12-14 16:13:55 -0800 + Commit: 352a0c8, github.com/apache/spark/pull/10300 + + [MINOR][DOC] Fix broken word2vec link + BenFradet + 2015-12-14 13:50:30 + + Commit: c0f0f6c, github.com/apache/spark/pull/10282 + + [SPARK-12275][SQL] No plan for BroadcastHint in some condition + yucai + 2015-12-13 23:08:21 -0800 + Commit: 94ce502, github.com/apache/spark/pull/10265 + + [SPARK-12281][CORE] Fix a race condition when reporting ExecutorState in the shutdown hook + Shixiong Zhu + 2015-12-13 22:06:39 -0800 + Commit: fbf16da, github.com/apache/spark/pull/10269 + + [SPARK-12267][CORE] Store the remote RpcEnv address to send the correct disconnetion message + Shixiong Zhu + 2015-12-12 21:58:55 -0800 + Commit: d7e3bfd, github.com/apache/spark/pull/10261 + + [SPARK-12199][DOC] Follow-up: Refine example code in ml-features.md + Xusen Yin + 2015-12-12 17:47:01 -0800 + Commit: e05364b, github.com/apache/spark/pull/10193 + + [SPARK-11193] Use Java ConcurrentHashMap instead of SynchronizedMap trait in order to avoid ClassCastException due to KryoSerializer in KinesisReceiver + Jean-Baptiste Onofré + 2015-12-12 08:51:52 + + Commit: 2679fce, github.com/apache/spark/pull/10203 + + [SPARK-12158][SPARKR][SQL] Fix 'sample' functions that break R unit test cases + gatorsmile + 2015-12-11 20:55:16 -0800 + Commit: 47461fe, github.com/apache/spark/pull/10160 + + [SPARK-12298][SQL] Fix infinite loop in DataFrame.sortWithinPartitions + Ankur Dave + 2015-12-11 19:07:48 -0800 + Commit: 03d8015, github.com/apache/spark/pull/10271 + + [SPARK-11978][ML] Move dataset_example.py to examples/ml and rename to dataframe_example.py + Yanbo Liang + 2015-12-11 18:02:24 -0800 + Commit: c2f2046, github.com/apache/spark/pull/9957 + + [SPARK-12217][ML] Document invalid handling for StringIndexer + BenFradet + 2015-12-11 15:43:00 -0800 + Commit: 75531c7, github.com/apache/spark/pull/10257 + + [SPARK-11497][MLLIB][PYTHON] PySpark RowMatrix Constructor Has Type Erasure Issue + Mike Dusenberry + 2015-12-11 14:21:33 -0800 + Commit: bfcc8cf, github.com/apache/spark/pull/9458 + + [SPARK-11964][DOCS][ML] Add in Pipeline Import/Export Documentation + anabranch , Bill Chambers + 2015-12-11 12:55:56 -0800 + Commit: 2ddd104, github.com/apache/spark/pull/10179 + + [SPARK-12146][SPARKR] SparkR jsonFile should support multiple input files + Yanbo Liang + 2015-12-11 11:47:35 -0800 + Commit: f05bae4, github.com/apache/spark/pull/10145 + + Preparing development version 1.6.0-SNAPSHOT + Patrick Wendell + 2015-12-11 11:25:09 -0800 + Commit: 2e45231 + + Preparing Spark release v1.6.0-rc2 + Patrick Wendell + 2015-12-11 11:25:03 -0800 + Commit: 23f8dfd + + [SPARK-12258] [SQL] passing null into ScalaUDF (follow-up) + Davies Liu + 2015-12-11 11:15:53 -0800 + Commit: eec3660, github.com/apache/spark/pull/10266 + + Preparing development version 1.6.0-SNAPSHOT + Patrick Wendell + 2015-12-10 18:45:42 -0800 + Commit: 250249e + + Preparing Spark release v1.6.0-rc2 + Patrick Wendell + 2015-12-10 18:45:36 -0800 + Commit: 3e39925 + + [SPARK-12258][SQL] passing null into ScalaUDF + Davies Liu + 2015-12-10 17:22:18 -0800 + Commit: d09af2c, github.com/apache/spark/pull/10249 + + [STREAMING][DOC][MINOR] Update the description of direct Kafka stream doc + jerryshao + 2015-12-10 15:31:46 -0800 + Commit: 5d3722f, github.com/apache/spark/pull/10246 + + [SPARK-12155][SPARK-12253] Fix executor OOM in unified memory management + Andrew Or + 2015-12-10 15:30:08 -0800 + Commit: c247b6a, github.com/apache/spark/pull/10240 + + [SPARK-12251] Document and improve off-heap memory configurations + Josh Rosen + 2015-12-10 15:29:04 -0800 + Commit: 9870e5c, github.com/apache/spark/pull/10237 + + [SPARK-12212][ML][DOC] Clarifies the difference between spark.ml, spark.mllib and mllib in the documentation. + Timothy Hunter + 2015-12-10 12:50:46 -0800 + Commit: d0307de, github.com/apache/spark/pull/10234 + + [SPARK-12250][SQL] Allow users to define a UDAF without providing details of its inputSchema + Yin Huai + 2015-12-10 12:03:29 -0800 + Commit: 594fafc, github.com/apache/spark/pull/10236 + + [SPARK-12012][SQL][BRANCH-1.6] Show more comprehensive PhysicalRDD metadata when visualizing SQL query plan + Cheng Lian + 2015-12-10 10:19:44 -0800 + Commit: e541f70, github.com/apache/spark/pull/10250 + +
spark git commit: [STREAMING][MINOR] Fix typo in function name of StateImpl
Repository: spark Updated Branches: refs/heads/branch-1.6 352a0c80f -> 23c884605 [STREAMING][MINOR] Fix typo in function name of StateImpl cc\ tdas zsxwing , please review. Thanks a lot. Author: jerryshao Closes #10305 from jerryshao/fix-typo-state-impl. (cherry picked from commit bc1ff9f4a41401599d3a87fb3c23a2078228a29b) Signed-off-by: Shixiong Zhu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/23c88460 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/23c88460 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/23c88460 Branch: refs/heads/branch-1.6 Commit: 23c8846050b307fdfe2307f7e7ca9d0f69f969a9 Parents: 352a0c8 Author: jerryshao Authored: Tue Dec 15 09:41:40 2015 -0800 Committer: Shixiong Zhu Committed: Tue Dec 15 09:41:50 2015 -0800 -- streaming/src/main/scala/org/apache/spark/streaming/State.scala| 2 +- .../scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala | 2 +- .../test/scala/org/apache/spark/streaming/MapWithStateSuite.scala | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/23c88460/streaming/src/main/scala/org/apache/spark/streaming/State.scala -- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/State.scala b/streaming/src/main/scala/org/apache/spark/streaming/State.scala index b47bdda..42424d6 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/State.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/State.scala @@ -206,7 +206,7 @@ private[streaming] class StateImpl[S] extends State[S] { * Update the internal data and flags in `this` to the given state that is going to be timed out. * This method allows `this` object to be reused across many state records. */ - def wrapTiminoutState(newState: S): Unit = { + def wrapTimingOutState(newState: S): Unit = { this.state = newState defined = true timingOut = true http://git-wip-us.apache.org/repos/asf/spark/blob/23c88460/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala -- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala b/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala index ed95171..fdf6167 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala @@ -67,7 +67,7 @@ private[streaming] object MapWithStateRDDRecord { // data returned if (removeTimedoutData && timeoutThresholdTime.isDefined) { newStateMap.getByTime(timeoutThresholdTime.get).foreach { case (key, state, _) => -wrappedState.wrapTiminoutState(state) +wrappedState.wrapTimingOutState(state) val returned = mappingFunction(batchTime, key, None, wrappedState) mappedData ++= returned newStateMap.remove(key) http://git-wip-us.apache.org/repos/asf/spark/blob/23c88460/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala -- diff --git a/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala index 4b08085..6b21433 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala @@ -125,7 +125,7 @@ class MapWithStateSuite extends SparkFunSuite state.remove() testState(None, shouldBeRemoved = true) -state.wrapTiminoutState(3) +state.wrapTimingOutState(3) testState(Some(3), shouldBeTimingOut = true) } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [STREAMING][MINOR] Fix typo in function name of StateImpl
Repository: spark Updated Branches: refs/heads/master c59df8c51 -> bc1ff9f4a [STREAMING][MINOR] Fix typo in function name of StateImpl cc\ tdas zsxwing , please review. Thanks a lot. Author: jerryshao Closes #10305 from jerryshao/fix-typo-state-impl. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bc1ff9f4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bc1ff9f4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bc1ff9f4 Branch: refs/heads/master Commit: bc1ff9f4a41401599d3a87fb3c23a2078228a29b Parents: c59df8c Author: jerryshao Authored: Tue Dec 15 09:41:40 2015 -0800 Committer: Shixiong Zhu Committed: Tue Dec 15 09:41:40 2015 -0800 -- streaming/src/main/scala/org/apache/spark/streaming/State.scala| 2 +- .../scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala | 2 +- .../test/scala/org/apache/spark/streaming/MapWithStateSuite.scala | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/bc1ff9f4/streaming/src/main/scala/org/apache/spark/streaming/State.scala -- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/State.scala b/streaming/src/main/scala/org/apache/spark/streaming/State.scala index b47bdda..42424d6 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/State.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/State.scala @@ -206,7 +206,7 @@ private[streaming] class StateImpl[S] extends State[S] { * Update the internal data and flags in `this` to the given state that is going to be timed out. * This method allows `this` object to be reused across many state records. */ - def wrapTiminoutState(newState: S): Unit = { + def wrapTimingOutState(newState: S): Unit = { this.state = newState defined = true timingOut = true http://git-wip-us.apache.org/repos/asf/spark/blob/bc1ff9f4/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala -- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala b/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala index ed95171..fdf6167 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala @@ -67,7 +67,7 @@ private[streaming] object MapWithStateRDDRecord { // data returned if (removeTimedoutData && timeoutThresholdTime.isDefined) { newStateMap.getByTime(timeoutThresholdTime.get).foreach { case (key, state, _) => -wrappedState.wrapTiminoutState(state) +wrappedState.wrapTimingOutState(state) val returned = mappingFunction(batchTime, key, None, wrappedState) mappedData ++= returned newStateMap.remove(key) http://git-wip-us.apache.org/repos/asf/spark/blob/bc1ff9f4/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala -- diff --git a/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala index 4b08085..6b21433 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala @@ -125,7 +125,7 @@ class MapWithStateSuite extends SparkFunSuite state.remove() testState(None, shouldBeRemoved = true) -state.wrapTiminoutState(3) +state.wrapTimingOutState(3) testState(Some(3), shouldBeTimingOut = true) } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-12332][TRIVIAL][TEST] Fix minor typo in ResetSystemProperties
Repository: spark Updated Branches: refs/heads/master 606f99b94 -> c59df8c51 [SPARK-12332][TRIVIAL][TEST] Fix minor typo in ResetSystemProperties Fix a minor typo (unbalanced bracket) in ResetSystemProperties. Author: Holden Karau Closes #10303 from holdenk/SPARK-12332-trivial-typo-in-ResetSystemProperties-comment. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c59df8c5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c59df8c5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c59df8c5 Branch: refs/heads/master Commit: c59df8c51609a0d6561ae1868e7970b516fb1811 Parents: 606f99b Author: Holden Karau Authored: Tue Dec 15 11:38:57 2015 + Committer: Sean Owen Committed: Tue Dec 15 11:38:57 2015 + -- .../test/scala/org/apache/spark/util/ResetSystemProperties.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c59df8c5/core/src/test/scala/org/apache/spark/util/ResetSystemProperties.scala -- diff --git a/core/src/test/scala/org/apache/spark/util/ResetSystemProperties.scala b/core/src/test/scala/org/apache/spark/util/ResetSystemProperties.scala index c58db5e..60fb7ab 100644 --- a/core/src/test/scala/org/apache/spark/util/ResetSystemProperties.scala +++ b/core/src/test/scala/org/apache/spark/util/ResetSystemProperties.scala @@ -45,7 +45,7 @@ private[spark] trait ResetSystemProperties extends BeforeAndAfterEach { this: Su var oldProperties: Properties = null override def beforeEach(): Unit = { -// we need SerializationUtils.clone instead of `new Properties(System.getProperties()` because +// we need SerializationUtils.clone instead of `new Properties(System.getProperties())` because // the later way of creating a copy does not copy the properties but it initializes a new // Properties object with the given properties as defaults. They are not recognized at all // by standard Scala wrapper over Java Properties then. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org