spark git commit: [SPARK-6408] [SQL] Fix JDBCRDD filtering string literals
Repository: spark Updated Branches: refs/heads/master b6090f902 - 9b1e1f20d [SPARK-6408] [SQL] Fix JDBCRDD filtering string literals Author: ypcat ypc...@gmail.com Author: Pei-Lun Lee pl...@appier.com Closes #5087 from ypcat/spark-6408 and squashes the following commits: 1becc16 [ypcat] [SPARK-6408] [SQL] styling 1bc4455 [ypcat] [SPARK-6408] [SQL] move nested function outside e57fa4a [ypcat] [SPARK-6408] [SQL] fix test case 245ab6f [ypcat] [SPARK-6408] [SQL] add test cases for filtering quoted strings 8962534 [Pei-Lun Lee] [SPARK-6408] [SQL] Fix filtering string literals Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9b1e1f20 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9b1e1f20 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9b1e1f20 Branch: refs/heads/master Commit: 9b1e1f20d4498bda72dd53a832110883a7ca41b5 Parents: b6090f9 Author: ypcat ypc...@gmail.com Authored: Sun Mar 22 15:49:13 2015 +0800 Committer: Cheng Lian l...@databricks.com Committed: Sun Mar 22 15:49:13 2015 +0800 -- .../org/apache/spark/sql/jdbc/JDBCRDD.scala | 19 ++- .../org/apache/spark/sql/jdbc/JDBCSuite.scala| 12 ++-- 2 files changed, 24 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9b1e1f20/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala index 3266b97..76f8593 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.jdbc import java.sql.{Connection, DriverManager, ResultSet, ResultSetMetaData, SQLException} +import org.apache.commons.lang.StringEscapeUtils.escapeSql import org.apache.spark.{Logging, Partition, SparkContext, TaskContext} import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.expressions.{Row, SpecificMutableRow} @@ -227,15 +228,23 @@ private[sql] class JDBCRDD( } /** + * Converts value to SQL expression. + */ + private def compileValue(value: Any): Any = value match { +case stringValue: String = s'${escapeSql(stringValue)}' +case _ = value + } + + /** * Turns a single Filter into a String representing a SQL expression. * Returns null for an unhandled filter. */ private def compileFilter(f: Filter): String = f match { -case EqualTo(attr, value) = s$attr = $value -case LessThan(attr, value) = s$attr $value -case GreaterThan(attr, value) = s$attr $value -case LessThanOrEqual(attr, value) = s$attr = $value -case GreaterThanOrEqual(attr, value) = s$attr = $value +case EqualTo(attr, value) = s$attr = ${compileValue(value)} +case LessThan(attr, value) = s$attr ${compileValue(value)} +case GreaterThan(attr, value) = s$attr ${compileValue(value)} +case LessThanOrEqual(attr, value) = s$attr = ${compileValue(value)} +case GreaterThanOrEqual(attr, value) = s$attr = ${compileValue(value)} case _ = null } http://git-wip-us.apache.org/repos/asf/spark/blob/9b1e1f20/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index cd737c0..5eb6ab2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -24,6 +24,7 @@ import java.util.{Calendar, GregorianCalendar} import org.apache.spark.sql.test._ import org.scalatest.{FunSuite, BeforeAndAfter} import TestSQLContext._ +import TestSQLContext.implicits._ class JDBCSuite extends FunSuite with BeforeAndAfter { val url = jdbc:h2:mem:testdb0 @@ -38,7 +39,7 @@ class JDBCSuite extends FunSuite with BeforeAndAfter { conn.prepareStatement(create table test.people (name TEXT(32) NOT NULL, theid INTEGER NOT NULL)).executeUpdate() conn.prepareStatement(insert into test.people values ('fred', 1)).executeUpdate() conn.prepareStatement(insert into test.people values ('mary', 2)).executeUpdate() -conn.prepareStatement(insert into test.people values ('joe', 3)).executeUpdate() +conn.prepareStatement(insert into test.people values ('joe ''foo'' \bar\', 3)).executeUpdate() conn.commit() sql( @@ -129,13 +130,20 @@ class JDBCSuite extends FunSuite with BeforeAndAfter { assert(sql(SELECT * FROM foobar WHERE THEID 1).collect().size == 0)
spark git commit: [SPARK-6408] [SQL] Fix JDBCRDD filtering string literals
Repository: spark Updated Branches: refs/heads/branch-1.3 0021d2260 - e60fbf6c4 [SPARK-6408] [SQL] Fix JDBCRDD filtering string literals Author: ypcat ypc...@gmail.com Author: Pei-Lun Lee pl...@appier.com Closes #5087 from ypcat/spark-6408 and squashes the following commits: 1becc16 [ypcat] [SPARK-6408] [SQL] styling 1bc4455 [ypcat] [SPARK-6408] [SQL] move nested function outside e57fa4a [ypcat] [SPARK-6408] [SQL] fix test case 245ab6f [ypcat] [SPARK-6408] [SQL] add test cases for filtering quoted strings 8962534 [Pei-Lun Lee] [SPARK-6408] [SQL] Fix filtering string literals (cherry picked from commit 9b1e1f20d4498bda72dd53a832110883a7ca41b5) Signed-off-by: Cheng Lian l...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e60fbf6c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e60fbf6c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e60fbf6c Branch: refs/heads/branch-1.3 Commit: e60fbf6c4597d0240ab1b472594774beb4f3d391 Parents: 0021d22 Author: ypcat ypc...@gmail.com Authored: Sun Mar 22 15:49:13 2015 +0800 Committer: Cheng Lian l...@databricks.com Committed: Sun Mar 22 15:49:38 2015 +0800 -- .../org/apache/spark/sql/jdbc/JDBCRDD.scala | 19 ++- .../org/apache/spark/sql/jdbc/JDBCSuite.scala| 12 ++-- 2 files changed, 24 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e60fbf6c/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala index 3266b97..76f8593 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.jdbc import java.sql.{Connection, DriverManager, ResultSet, ResultSetMetaData, SQLException} +import org.apache.commons.lang.StringEscapeUtils.escapeSql import org.apache.spark.{Logging, Partition, SparkContext, TaskContext} import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.expressions.{Row, SpecificMutableRow} @@ -227,15 +228,23 @@ private[sql] class JDBCRDD( } /** + * Converts value to SQL expression. + */ + private def compileValue(value: Any): Any = value match { +case stringValue: String = s'${escapeSql(stringValue)}' +case _ = value + } + + /** * Turns a single Filter into a String representing a SQL expression. * Returns null for an unhandled filter. */ private def compileFilter(f: Filter): String = f match { -case EqualTo(attr, value) = s$attr = $value -case LessThan(attr, value) = s$attr $value -case GreaterThan(attr, value) = s$attr $value -case LessThanOrEqual(attr, value) = s$attr = $value -case GreaterThanOrEqual(attr, value) = s$attr = $value +case EqualTo(attr, value) = s$attr = ${compileValue(value)} +case LessThan(attr, value) = s$attr ${compileValue(value)} +case GreaterThan(attr, value) = s$attr ${compileValue(value)} +case LessThanOrEqual(attr, value) = s$attr = ${compileValue(value)} +case GreaterThanOrEqual(attr, value) = s$attr = ${compileValue(value)} case _ = null } http://git-wip-us.apache.org/repos/asf/spark/blob/e60fbf6c/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index cd737c0..5eb6ab2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -24,6 +24,7 @@ import java.util.{Calendar, GregorianCalendar} import org.apache.spark.sql.test._ import org.scalatest.{FunSuite, BeforeAndAfter} import TestSQLContext._ +import TestSQLContext.implicits._ class JDBCSuite extends FunSuite with BeforeAndAfter { val url = jdbc:h2:mem:testdb0 @@ -38,7 +39,7 @@ class JDBCSuite extends FunSuite with BeforeAndAfter { conn.prepareStatement(create table test.people (name TEXT(32) NOT NULL, theid INTEGER NOT NULL)).executeUpdate() conn.prepareStatement(insert into test.people values ('fred', 1)).executeUpdate() conn.prepareStatement(insert into test.people values ('mary', 2)).executeUpdate() -conn.prepareStatement(insert into test.people values ('joe', 3)).executeUpdate() +conn.prepareStatement(insert into test.people values ('joe ''foo'' \bar\', 3)).executeUpdate() conn.commit() sql( @@ -129,13 +130,20 @@ class JDBCSuite extends
[2/2] spark git commit: [SPARK-6132] ContextCleaner race condition across SparkContexts
[SPARK-6132] ContextCleaner race condition across SparkContexts The problem is that `ContextCleaner` may clean variables that belong to a different `SparkContext`. This can happen if the `SparkContext` to which the cleaner belongs stops, and a new one is started immediately afterwards in the same JVM. In this case, if the cleaner is in the middle of cleaning a broadcast, for instance, it will do so through `SparkEnv.get.blockManager`, which could be one that belongs to a different `SparkContext`. JoshRosen and I suspect that this is the cause of many flaky tests, most notably the `JavaAPISuite`. We were able to reproduce the failure locally (though it is not deterministic and very hard to reproduce). Author: Andrew Or and...@databricks.com Closes #4869 from andrewor14/cleaner-masquerade and squashes the following commits: 29168c0 [Andrew Or] Synchronize ContextCleaner stop Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/06d883c3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/06d883c3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/06d883c3 Branch: refs/heads/branch-1.2 Commit: 06d883c3735986c88585565d4e66a5231431a4b8 Parents: a2a94a1 Author: Andrew Or and...@databricks.com Authored: Tue Mar 3 13:44:05 2015 -0800 Committer: Sean Owen so...@cloudera.com Committed: Sun Mar 22 13:05:11 2015 + -- .../scala/org/apache/spark/ContextCleaner.scala | 35 ++-- 1 file changed, 24 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/06d883c3/core/src/main/scala/org/apache/spark/ContextCleaner.scala -- diff --git a/core/src/main/scala/org/apache/spark/ContextCleaner.scala b/core/src/main/scala/org/apache/spark/ContextCleaner.scala index ede1e23..201e5ec 100644 --- a/core/src/main/scala/org/apache/spark/ContextCleaner.scala +++ b/core/src/main/scala/org/apache/spark/ContextCleaner.scala @@ -104,9 +104,19 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging { cleaningThread.start() } - /** Stop the cleaner. */ + /** + * Stop the cleaning thread and wait until the thread has finished running its current task. + */ def stop() { stopped = true +// Interrupt the cleaning thread, but wait until the current task has finished before +// doing so. This guards against the race condition where a cleaning thread may +// potentially clean similarly named variables created by a different SparkContext, +// resulting in otherwise inexplicable block-not-found exceptions (SPARK-6132). +synchronized { + cleaningThread.interrupt() +} +cleaningThread.join() } /** Register a RDD for cleanup when it is garbage collected. */ @@ -135,16 +145,19 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging { try { val reference = Option(referenceQueue.remove(ContextCleaner.REF_QUEUE_POLL_TIMEOUT)) .map(_.asInstanceOf[CleanupTaskWeakReference]) -reference.map(_.task).foreach { task = - logDebug(Got cleaning task + task) - referenceBuffer -= reference.get - task match { -case CleanRDD(rddId) = - doCleanupRDD(rddId, blocking = blockOnCleanupTasks) -case CleanShuffle(shuffleId) = - doCleanupShuffle(shuffleId, blocking = blockOnShuffleCleanupTasks) -case CleanBroadcast(broadcastId) = - doCleanupBroadcast(broadcastId, blocking = blockOnCleanupTasks) +// Synchronize here to avoid being interrupted on stop() +synchronized { + reference.map(_.task).foreach { task = +logDebug(Got cleaning task + task) +referenceBuffer -= reference.get +task match { + case CleanRDD(rddId) = +doCleanupRDD(rddId, blocking = blockOnCleanupTasks) + case CleanShuffle(shuffleId) = +doCleanupShuffle(shuffleId, blocking = blockOnShuffleCleanupTasks) + case CleanBroadcast(broadcastId) = +doCleanupBroadcast(broadcastId, blocking = blockOnCleanupTasks) +} } } } catch { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[1/2] spark git commit: [SPARK-6132][HOTFIX] ContextCleaner InterruptedException should be quiet
Repository: spark Updated Branches: refs/heads/branch-1.2 a2a94a154 - abdcec673 [SPARK-6132][HOTFIX] ContextCleaner InterruptedException should be quiet If the cleaner is stopped, we shouldn't print a huge stack trace when the cleaner thread is interrupted because we purposefully did this. Author: Andrew Or and...@databricks.com Closes #4882 from andrewor14/cleaner-interrupt and squashes the following commits: 8652120 [Andrew Or] Just a hot fix Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/abdcec67 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/abdcec67 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/abdcec67 Branch: refs/heads/branch-1.2 Commit: abdcec673f722151cfde59111cf38a0842cc11ba Parents: 06d883c Author: Andrew Or and...@databricks.com Authored: Tue Mar 3 20:49:45 2015 -0800 Committer: Sean Owen so...@cloudera.com Committed: Sun Mar 22 13:05:11 2015 + -- core/src/main/scala/org/apache/spark/ContextCleaner.scala | 1 + 1 file changed, 1 insertion(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/abdcec67/core/src/main/scala/org/apache/spark/ContextCleaner.scala -- diff --git a/core/src/main/scala/org/apache/spark/ContextCleaner.scala b/core/src/main/scala/org/apache/spark/ContextCleaner.scala index 201e5ec..98e4401 100644 --- a/core/src/main/scala/org/apache/spark/ContextCleaner.scala +++ b/core/src/main/scala/org/apache/spark/ContextCleaner.scala @@ -161,6 +161,7 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging { } } } catch { +case ie: InterruptedException if stopped = // ignore case e: Exception = logError(Error in cleaning thread, e) } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-6448] Make history server log parse exceptions
Repository: spark Updated Branches: refs/heads/master 9b1e1f20d - b9fe504b4 [SPARK-6448] Make history server log parse exceptions This helped me to debug a parse error that was due to the event log format changing recently. Author: Ryan Williams ryan.blake.willi...@gmail.com Closes #5122 from ryan-williams/histerror and squashes the following commits: 5831656 [Ryan Williams] line length c3742ae [Ryan Williams] Make history server log parse exceptions Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b9fe504b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b9fe504b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b9fe504b Branch: refs/heads/master Commit: b9fe504b497cfa509310b4045de4873739c76667 Parents: 9b1e1f2 Author: Ryan Williams ryan.blake.willi...@gmail.com Authored: Sun Mar 22 11:54:23 2015 + Committer: Sean Owen so...@cloudera.com Committed: Sun Mar 22 11:54:23 2015 + -- .../scala/org/apache/spark/deploy/history/FsHistoryProvider.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b9fe504b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala index 7fde020..db7c499 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala @@ -233,7 +233,8 @@ private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHis } catch { case e: Exception = logError( -sException encountered when attempting to load application log ${fileStatus.getPath}) +sException encountered when attempting to load application log ${fileStatus.getPath}, +e) None } }.toSeq.sortWith(compareAppInfo) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[1/2] spark git commit: [SPARK-6132] ContextCleaner race condition across SparkContexts
Repository: spark Updated Branches: refs/heads/branch-1.1 c5836816f - 39761f515 [SPARK-6132] ContextCleaner race condition across SparkContexts The problem is that `ContextCleaner` may clean variables that belong to a different `SparkContext`. This can happen if the `SparkContext` to which the cleaner belongs stops, and a new one is started immediately afterwards in the same JVM. In this case, if the cleaner is in the middle of cleaning a broadcast, for instance, it will do so through `SparkEnv.get.blockManager`, which could be one that belongs to a different `SparkContext`. JoshRosen and I suspect that this is the cause of many flaky tests, most notably the `JavaAPISuite`. We were able to reproduce the failure locally (though it is not deterministic and very hard to reproduce). Author: Andrew Or and...@databricks.com Closes #4869 from andrewor14/cleaner-masquerade and squashes the following commits: 29168c0 [Andrew Or] Synchronize ContextCleaner stop Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e445ce61 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e445ce61 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e445ce61 Branch: refs/heads/branch-1.1 Commit: e445ce61d02778beaa30a2d1867e5b06fe09fb5d Parents: c583681 Author: Andrew Or and...@databricks.com Authored: Tue Mar 3 13:44:05 2015 -0800 Committer: Sean Owen so...@cloudera.com Committed: Sun Mar 22 13:08:14 2015 + -- .../scala/org/apache/spark/ContextCleaner.scala | 35 ++-- 1 file changed, 24 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e445ce61/core/src/main/scala/org/apache/spark/ContextCleaner.scala -- diff --git a/core/src/main/scala/org/apache/spark/ContextCleaner.scala b/core/src/main/scala/org/apache/spark/ContextCleaner.scala index ede1e23..201e5ec 100644 --- a/core/src/main/scala/org/apache/spark/ContextCleaner.scala +++ b/core/src/main/scala/org/apache/spark/ContextCleaner.scala @@ -104,9 +104,19 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging { cleaningThread.start() } - /** Stop the cleaner. */ + /** + * Stop the cleaning thread and wait until the thread has finished running its current task. + */ def stop() { stopped = true +// Interrupt the cleaning thread, but wait until the current task has finished before +// doing so. This guards against the race condition where a cleaning thread may +// potentially clean similarly named variables created by a different SparkContext, +// resulting in otherwise inexplicable block-not-found exceptions (SPARK-6132). +synchronized { + cleaningThread.interrupt() +} +cleaningThread.join() } /** Register a RDD for cleanup when it is garbage collected. */ @@ -135,16 +145,19 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging { try { val reference = Option(referenceQueue.remove(ContextCleaner.REF_QUEUE_POLL_TIMEOUT)) .map(_.asInstanceOf[CleanupTaskWeakReference]) -reference.map(_.task).foreach { task = - logDebug(Got cleaning task + task) - referenceBuffer -= reference.get - task match { -case CleanRDD(rddId) = - doCleanupRDD(rddId, blocking = blockOnCleanupTasks) -case CleanShuffle(shuffleId) = - doCleanupShuffle(shuffleId, blocking = blockOnShuffleCleanupTasks) -case CleanBroadcast(broadcastId) = - doCleanupBroadcast(broadcastId, blocking = blockOnCleanupTasks) +// Synchronize here to avoid being interrupted on stop() +synchronized { + reference.map(_.task).foreach { task = +logDebug(Got cleaning task + task) +referenceBuffer -= reference.get +task match { + case CleanRDD(rddId) = +doCleanupRDD(rddId, blocking = blockOnCleanupTasks) + case CleanShuffle(shuffleId) = +doCleanupShuffle(shuffleId, blocking = blockOnShuffleCleanupTasks) + case CleanBroadcast(broadcastId) = +doCleanupBroadcast(broadcastId, blocking = blockOnCleanupTasks) +} } } } catch { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[2/2] spark git commit: [SPARK-6132][HOTFIX] ContextCleaner InterruptedException should be quiet
[SPARK-6132][HOTFIX] ContextCleaner InterruptedException should be quiet If the cleaner is stopped, we shouldn't print a huge stack trace when the cleaner thread is interrupted because we purposefully did this. Author: Andrew Or and...@databricks.com Closes #4882 from andrewor14/cleaner-interrupt and squashes the following commits: 8652120 [Andrew Or] Just a hot fix Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/39761f51 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/39761f51 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/39761f51 Branch: refs/heads/branch-1.1 Commit: 39761f515d65afff377873ee4701b9313c317a60 Parents: e445ce6 Author: Andrew Or and...@databricks.com Authored: Tue Mar 3 20:49:45 2015 -0800 Committer: Sean Owen so...@cloudera.com Committed: Sun Mar 22 13:08:34 2015 + -- core/src/main/scala/org/apache/spark/ContextCleaner.scala | 1 + 1 file changed, 1 insertion(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/39761f51/core/src/main/scala/org/apache/spark/ContextCleaner.scala -- diff --git a/core/src/main/scala/org/apache/spark/ContextCleaner.scala b/core/src/main/scala/org/apache/spark/ContextCleaner.scala index 201e5ec..98e4401 100644 --- a/core/src/main/scala/org/apache/spark/ContextCleaner.scala +++ b/core/src/main/scala/org/apache/spark/ContextCleaner.scala @@ -161,6 +161,7 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging { } } } catch { +case ie: InterruptedException if stopped = // ignore case e: Exception = logError(Error in cleaning thread, e) } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: SPARK-6454 [DOCS] Fix links to pyspark api
Repository: spark Updated Branches: refs/heads/branch-1.3 e60fbf6c4 - 3ba295fa0 SPARK-6454 [DOCS] Fix links to pyspark api Author: Kamil Smuga smugaka...@gmail.com Author: stderr smugaka...@gmail.com Closes #5120 from kamilsmuga/master and squashes the following commits: fee3281 [Kamil Smuga] more python api links fixed for docs 13240cb [Kamil Smuga] resolved merge conflicts with upstream/master 6649b3b [Kamil Smuga] fix broken docs links to Python API 92f03d7 [stderr] Fix links to pyspark api (cherry picked from commit 6ef48632fbf3e6659ceacaab1dbb8be8238d4d33) Signed-off-by: Sean Owen so...@cloudera.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3ba295fa Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3ba295fa Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3ba295fa Branch: refs/heads/branch-1.3 Commit: 3ba295fa08d7e57f507319bae621ef7d788a0d23 Parents: e60fbf6 Author: Kamil Smuga smugaka...@gmail.com Authored: Sun Mar 22 15:56:25 2015 + Committer: Sean Owen so...@cloudera.com Committed: Sun Mar 22 15:57:15 2015 + -- docs/mllib-data-types.md | 8 docs/mllib-naive-bayes.md | 6 +++--- docs/mllib-statistics.md | 10 +- docs/programming-guide.md | 12 ++-- docs/sql-programming-guide.md | 2 +- 5 files changed, 19 insertions(+), 19 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3ba295fa/docs/mllib-data-types.md -- diff --git a/docs/mllib-data-types.md b/docs/mllib-data-types.md index fe6c1bf..4f2a2f7 100644 --- a/docs/mllib-data-types.md +++ b/docs/mllib-data-types.md @@ -78,13 +78,13 @@ MLlib recognizes the following types as dense vectors: and the following as sparse vectors: -* MLlib's [`SparseVector`](api/python/pyspark.mllib.linalg.SparseVector-class.html). +* MLlib's [`SparseVector`](api/python/pyspark.mllib.html#pyspark.mllib.linalg.SparseVector). * SciPy's [`csc_matrix`](http://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csc_matrix.html#scipy.sparse.csc_matrix) with a single column We recommend using NumPy arrays over lists for efficiency, and using the factory methods implemented -in [`Vectors`](api/python/pyspark.mllib.linalg.Vectors-class.html) to create sparse vectors. +in [`Vectors`](api/python/pyspark.mllib.html#pyspark.mllib.linalg.Vector) to create sparse vectors. {% highlight python %} import numpy as np @@ -151,7 +151,7 @@ LabeledPoint neg = new LabeledPoint(1.0, Vectors.sparse(3, new int[] {0, 2}, new div data-lang=python markdown=1 A labeled point is represented by -[`LabeledPoint`](api/python/pyspark.mllib.regression.LabeledPoint-class.html). +[`LabeledPoint`](api/python/pyspark.mllib.html#pyspark.mllib.regression.LabeledPoint). {% highlight python %} from pyspark.mllib.linalg import SparseVector @@ -211,7 +211,7 @@ JavaRDDLabeledPoint examples = /div div data-lang=python markdown=1 -[`MLUtils.loadLibSVMFile`](api/python/pyspark.mllib.util.MLUtils-class.html) reads training +[`MLUtils.loadLibSVMFile`](api/python/pyspark.mllib.html#pyspark.mllib.util.MLUtils) reads training examples stored in LIBSVM format. {% highlight python %} http://git-wip-us.apache.org/repos/asf/spark/blob/3ba295fa/docs/mllib-naive-bayes.md -- diff --git a/docs/mllib-naive-bayes.md b/docs/mllib-naive-bayes.md index 55b8f2c..a83472f 100644 --- a/docs/mllib-naive-bayes.md +++ b/docs/mllib-naive-bayes.md @@ -106,11 +106,11 @@ NaiveBayesModel sameModel = NaiveBayesModel.load(sc.sc(), myModelPath); div data-lang=python markdown=1 -[NaiveBayes](api/python/pyspark.mllib.classification.NaiveBayes-class.html) implements multinomial +[NaiveBayes](api/python/pyspark.mllib.html#pyspark.mllib.classification.NaiveBayes) implements multinomial naive Bayes. It takes an RDD of -[LabeledPoint](api/python/pyspark.mllib.regression.LabeledPoint-class.html) and an optionally +[LabeledPoint](api/python/pyspark.mllib.html#pyspark.mllib.regression.LabeledPoint) and an optionally smoothing parameter `lambda` as input, and output a -[NaiveBayesModel](api/python/pyspark.mllib.classification.NaiveBayesModel-class.html), which can be +[NaiveBayesModel](api/python/pyspark.mllib.html#pyspark.mllib.classification.NaiveBayesModel), which can be used for evaluation and prediction. Note that the Python API does not yet support model save/load but will in the future. http://git-wip-us.apache.org/repos/asf/spark/blob/3ba295fa/docs/mllib-statistics.md -- diff --git a/docs/mllib-statistics.md b/docs/mllib-statistics.md index ca8c292..887eae7 100644 ---
spark git commit: SPARK-6454 [DOCS] Fix links to pyspark api
Repository: spark Updated Branches: refs/heads/master adb2ff752 - 6ef48632f SPARK-6454 [DOCS] Fix links to pyspark api Author: Kamil Smuga smugaka...@gmail.com Author: stderr smugaka...@gmail.com Closes #5120 from kamilsmuga/master and squashes the following commits: fee3281 [Kamil Smuga] more python api links fixed for docs 13240cb [Kamil Smuga] resolved merge conflicts with upstream/master 6649b3b [Kamil Smuga] fix broken docs links to Python API 92f03d7 [stderr] Fix links to pyspark api Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6ef48632 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6ef48632 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6ef48632 Branch: refs/heads/master Commit: 6ef48632fbf3e6659ceacaab1dbb8be8238d4d33 Parents: adb2ff7 Author: Kamil Smuga smugaka...@gmail.com Authored: Sun Mar 22 15:56:25 2015 + Committer: Sean Owen so...@cloudera.com Committed: Sun Mar 22 15:56:25 2015 + -- docs/mllib-data-types.md | 8 docs/mllib-naive-bayes.md | 6 +++--- docs/mllib-statistics.md | 10 +- docs/programming-guide.md | 12 ++-- docs/sql-programming-guide.md | 2 +- 5 files changed, 19 insertions(+), 19 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6ef48632/docs/mllib-data-types.md -- diff --git a/docs/mllib-data-types.md b/docs/mllib-data-types.md index fe6c1bf..4f2a2f7 100644 --- a/docs/mllib-data-types.md +++ b/docs/mllib-data-types.md @@ -78,13 +78,13 @@ MLlib recognizes the following types as dense vectors: and the following as sparse vectors: -* MLlib's [`SparseVector`](api/python/pyspark.mllib.linalg.SparseVector-class.html). +* MLlib's [`SparseVector`](api/python/pyspark.mllib.html#pyspark.mllib.linalg.SparseVector). * SciPy's [`csc_matrix`](http://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csc_matrix.html#scipy.sparse.csc_matrix) with a single column We recommend using NumPy arrays over lists for efficiency, and using the factory methods implemented -in [`Vectors`](api/python/pyspark.mllib.linalg.Vectors-class.html) to create sparse vectors. +in [`Vectors`](api/python/pyspark.mllib.html#pyspark.mllib.linalg.Vector) to create sparse vectors. {% highlight python %} import numpy as np @@ -151,7 +151,7 @@ LabeledPoint neg = new LabeledPoint(1.0, Vectors.sparse(3, new int[] {0, 2}, new div data-lang=python markdown=1 A labeled point is represented by -[`LabeledPoint`](api/python/pyspark.mllib.regression.LabeledPoint-class.html). +[`LabeledPoint`](api/python/pyspark.mllib.html#pyspark.mllib.regression.LabeledPoint). {% highlight python %} from pyspark.mllib.linalg import SparseVector @@ -211,7 +211,7 @@ JavaRDDLabeledPoint examples = /div div data-lang=python markdown=1 -[`MLUtils.loadLibSVMFile`](api/python/pyspark.mllib.util.MLUtils-class.html) reads training +[`MLUtils.loadLibSVMFile`](api/python/pyspark.mllib.html#pyspark.mllib.util.MLUtils) reads training examples stored in LIBSVM format. {% highlight python %} http://git-wip-us.apache.org/repos/asf/spark/blob/6ef48632/docs/mllib-naive-bayes.md -- diff --git a/docs/mllib-naive-bayes.md b/docs/mllib-naive-bayes.md index 55b8f2c..a83472f 100644 --- a/docs/mllib-naive-bayes.md +++ b/docs/mllib-naive-bayes.md @@ -106,11 +106,11 @@ NaiveBayesModel sameModel = NaiveBayesModel.load(sc.sc(), myModelPath); div data-lang=python markdown=1 -[NaiveBayes](api/python/pyspark.mllib.classification.NaiveBayes-class.html) implements multinomial +[NaiveBayes](api/python/pyspark.mllib.html#pyspark.mllib.classification.NaiveBayes) implements multinomial naive Bayes. It takes an RDD of -[LabeledPoint](api/python/pyspark.mllib.regression.LabeledPoint-class.html) and an optionally +[LabeledPoint](api/python/pyspark.mllib.html#pyspark.mllib.regression.LabeledPoint) and an optionally smoothing parameter `lambda` as input, and output a -[NaiveBayesModel](api/python/pyspark.mllib.classification.NaiveBayesModel-class.html), which can be +[NaiveBayesModel](api/python/pyspark.mllib.html#pyspark.mllib.classification.NaiveBayesModel), which can be used for evaluation and prediction. Note that the Python API does not yet support model save/load but will in the future. http://git-wip-us.apache.org/repos/asf/spark/blob/6ef48632/docs/mllib-statistics.md -- diff --git a/docs/mllib-statistics.md b/docs/mllib-statistics.md index ca8c292..887eae7 100644 --- a/docs/mllib-statistics.md +++ b/docs/mllib-statistics.md @@ -81,8 +81,8 @@ System.out.println(summary.numNonzeros()); // number of
spark git commit: [SPARK-6455] [docs] Correct some mistakes and typos
Repository: spark Updated Branches: refs/heads/master b9fe504b4 - ab4f516fb [SPARK-6455] [docs] Correct some mistakes and typos Correct some typos. Correct a mistake in lib/PageRank.scala. The first PageRank implementation uses standalone Graph interface, but the second uses Pregel interface. It may mislead the code viewers. Author: Hangchen Yu y...@gitcafe.com Closes #5128 from yuhc/master and squashes the following commits: 53e5432 [Hangchen Yu] Merge branch 'master' of https://github.com/yuhc/spark 67b77b5 [Hangchen Yu] [SPARK-6455] [docs] Correct some mistakes and typos 206f2dc [Hangchen Yu] Correct some mistakes and typos. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ab4f516f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ab4f516f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ab4f516f Branch: refs/heads/master Commit: ab4f516fbe63e24e076c68f4933a171a72b6f1fd Parents: b9fe504 Author: Hangchen Yu y...@gitcafe.com Authored: Sun Mar 22 15:51:10 2015 + Committer: Sean Owen so...@cloudera.com Committed: Sun Mar 22 15:51:10 2015 + -- graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala | 4 ++-- graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala | 4 ++-- graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ab4f516f/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala -- diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala index dc8b478..86f611d 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala @@ -113,7 +113,7 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali * Collect the neighbor vertex attributes for each vertex. * * @note This function could be highly inefficient on power-law - * graphs where high degree vertices may force a large ammount of + * graphs where high degree vertices may force a large amount of * information to be collected to a single location. * * @param edgeDirection the direction along which to collect @@ -187,7 +187,7 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali /** * Join the vertices with an RDD and then apply a function from the - * the vertex and RDD entry to a new vertex value. The input table + * vertex and RDD entry to a new vertex value. The input table * should contain at most one entry for each vertex. If no entry is * provided the map function is skipped and the old value is used. * http://git-wip-us.apache.org/repos/asf/spark/blob/ab4f516f/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala -- diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala b/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala index 5e55620..01b013f 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala @@ -78,8 +78,8 @@ object Pregel extends Logging { * * @param graph the input graph. * - * @param initialMsg the message each vertex will receive at the on - * the first iteration + * @param initialMsg the message each vertex will receive at the first + * iteration * * @param maxIterations the maximum number of iterations to run for * http://git-wip-us.apache.org/repos/asf/spark/blob/ab4f516f/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala -- diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala index e139959..ca3b513 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala @@ -25,7 +25,7 @@ import org.apache.spark.graphx._ /** * PageRank algorithm implementation. There are two implementations of PageRank implemented. * - * The first implementation uses the [[Pregel]] interface and runs PageRank for a fixed number + * The first implementation uses the standalone [[Graph]] interface and runs PageRank for a fixed number * of iterations: * {{{ * var PR = Array.fill(n)( 1.0 ) @@ -38,7 +38,7 @@ import org.apache.spark.graphx._ * } * }}} * - * The second implementation uses the standalone [[Graph]] interface and runs PageRank until + * The
spark git commit: [SPARK-6453][Mesos] Some Mesos*Suite have a different package with their classes
Repository: spark Updated Branches: refs/heads/master ab4f516fb - adb2ff752 [SPARK-6453][Mesos] Some Mesos*Suite have a different package with their classes - Moved Suites from o.a.s.s.mesos to o.a.s.s.cluster.mesos Author: Jongyoul Lee jongy...@gmail.com Closes #5126 from jongyoul/SPARK-6453 and squashes the following commits: 4f24a3e [Jongyoul Lee] [SPARK-6453][Mesos] Some Mesos*Suite have a different package with their classes - Fixed imports orders 8ab149d [Jongyoul Lee] [SPARK-6453][Mesos] Some Mesos*Suite have a different package with their classes - Moved Suites from o.a.s.s.mesos to o.a.s.s.cluster.mesos Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/adb2ff75 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/adb2ff75 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/adb2ff75 Branch: refs/heads/master Commit: adb2ff752fa8bda54c969b60a3168d87cd70237d Parents: ab4f516 Author: Jongyoul Lee jongy...@gmail.com Authored: Sun Mar 22 15:53:18 2015 + Committer: Sean Owen so...@cloudera.com Committed: Sun Mar 22 15:54:19 2015 + -- .../mesos/MesosSchedulerBackendSuite.scala | 168 ++ .../mesos/MesosTaskLaunchDataSuite.scala| 36 .../mesos/MesosSchedulerBackendSuite.scala | 169 --- .../mesos/MesosTaskLaunchDataSuite.scala| 38 - 4 files changed, 204 insertions(+), 207 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/adb2ff75/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendSuite.scala new file mode 100644 index 000..f1a4380 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendSuite.scala @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the License); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.scheduler.cluster.mesos + +import java.nio.ByteBuffer +import java.util +import java.util.Collections + +import scala.collection.mutable +import scala.collection.mutable.ArrayBuffer + +import org.apache.mesos.Protos.Value.Scalar +import org.apache.mesos.Protos._ +import org.apache.mesos.SchedulerDriver +import org.mockito.Matchers._ +import org.mockito.Mockito._ +import org.mockito.{ArgumentCaptor, Matchers} +import org.scalatest.FunSuite +import org.scalatest.mock.MockitoSugar + +import org.apache.spark.executor.MesosExecutorBackend +import org.apache.spark.scheduler.cluster.ExecutorInfo +import org.apache.spark.scheduler.{LiveListenerBus, SparkListenerExecutorAdded, + TaskDescription, TaskSchedulerImpl, WorkerOffer} +import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext} + +class MesosSchedulerBackendSuite extends FunSuite with LocalSparkContext with MockitoSugar { + + test(check spark-class location correctly) { +val conf = new SparkConf +conf.set(spark.mesos.executor.home , /mesos-home) + +val listenerBus = mock[LiveListenerBus] +listenerBus.post( + SparkListenerExecutorAdded(anyLong, s1, new ExecutorInfo(host1, 2, Map.empty))) + +val sc = mock[SparkContext] +when(sc.getSparkHome()).thenReturn(Option(/spark-home)) + +when(sc.conf).thenReturn(conf) +when(sc.executorEnvs).thenReturn(new mutable.HashMap[String, String]) +when(sc.executorMemory).thenReturn(100) +when(sc.listenerBus).thenReturn(listenerBus) +val taskScheduler = mock[TaskSchedulerImpl] +when(taskScheduler.CPUS_PER_TASK).thenReturn(2) + +val mesosSchedulerBackend = new MesosSchedulerBackend(taskScheduler, sc, master) + +// uri is null. +val executorInfo = mesosSchedulerBackend.createExecutorInfo(test-id) +assert(executorInfo.getCommand.getValue === s /mesos-home/bin/spark-class ${classOf[MesosExecutorBackend].getName}) + +// uri exists. +
spark git commit: [HOTFIX] Build break due to https://github.com/apache/spark/pull/5128
Repository: spark Updated Branches: refs/heads/master a41b9c600 - 7a0da4770 [HOTFIX] Build break due to https://github.com/apache/spark/pull/5128 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7a0da477 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7a0da477 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7a0da477 Branch: refs/heads/master Commit: 7a0da47708b0e6b117b5c1a35aa3e93b8a914d5f Parents: a41b9c6 Author: Reynold Xin r...@databricks.com Authored: Sun Mar 22 12:08:15 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Sun Mar 22 12:08:15 2015 -0700 -- graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7a0da477/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala -- diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala index ca3b513..570440b 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala @@ -25,8 +25,8 @@ import org.apache.spark.graphx._ /** * PageRank algorithm implementation. There are two implementations of PageRank implemented. * - * The first implementation uses the standalone [[Graph]] interface and runs PageRank for a fixed number - * of iterations: + * The first implementation uses the standalone [[Graph]] interface and runs PageRank + * for a fixed number of iterations: * {{{ * var PR = Array.fill(n)( 1.0 ) * val oldPR = Array.fill(n)( 1.0 ) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-6337][Documentation, SQL]Spark 1.3 doc fixes
Repository: spark Updated Branches: refs/heads/branch-1.3 3ba295fa0 - 857e8a60e [SPARK-6337][Documentation, SQL]Spark 1.3 doc fixes Author: vinodkc vinod.kc...@gmail.com Closes #5112 from vinodkc/spark_1.3_doc_fixes and squashes the following commits: 2c6aee6 [vinodkc] Spark 1.3 doc fixes (cherry picked from commit 2bf40c58e6e89e061783c999204107069df17f73) Signed-off-by: Sean Owen so...@cloudera.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/857e8a60 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/857e8a60 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/857e8a60 Branch: refs/heads/branch-1.3 Commit: 857e8a60ed43881ecb27d2e5c33915a8d128b6e8 Parents: 3ba295f Author: vinodkc vinod.kc...@gmail.com Authored: Sun Mar 22 20:00:08 2015 + Committer: Sean Owen so...@cloudera.com Committed: Sun Mar 22 20:00:19 2015 + -- docs/sql-programming-guide.md| 7 +-- mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala | 2 +- sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/857e8a60/docs/sql-programming-guide.md -- diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index a7d3574..6a333fd 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -509,8 +509,11 @@ val people = sc.textFile(examples/src/main/resources/people.txt) // The schema is encoded in a string val schemaString = name age -// Import Spark SQL data types and Row. -import org.apache.spark.sql._ +// Import Row. +import org.apache.spark.sql.Row; + +// Import Spark SQL data types +import org.apache.spark.sql.types.{StructType,StructField,StringType}; // Generate the schema based on the string of schema val schema = http://git-wip-us.apache.org/repos/asf/spark/blob/857e8a60/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala index 5bbcd2e..c4a3610 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala @@ -33,7 +33,7 @@ import org.apache.spark.sql.types.StructType abstract class PipelineStage extends Serializable with Logging { /** - * :: DeveloperAPI :: + * :: DeveloperApi :: * * Derives the output schema from the input schema and parameters. * The schema describes the columns and types of the data. http://git-wip-us.apache.org/repos/asf/spark/blob/857e8a60/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala index 8b8f86c..5aece16 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala @@ -89,7 +89,7 @@ private[sql] object DataFrame { * val people = sqlContext.parquetFile(...) * val department = sqlContext.parquetFile(...) * - * people.filter(age 30) + * people.filter(age 30) * .join(department, people(deptId) === department(id)) * .groupBy(department(name), gender) * .agg(avg(people(salary)), max(people(age))) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-6337][Documentation, SQL]Spark 1.3 doc fixes
Repository: spark Updated Branches: refs/heads/master 7a0da4770 - 2bf40c58e [SPARK-6337][Documentation, SQL]Spark 1.3 doc fixes Author: vinodkc vinod.kc...@gmail.com Closes #5112 from vinodkc/spark_1.3_doc_fixes and squashes the following commits: 2c6aee6 [vinodkc] Spark 1.3 doc fixes Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2bf40c58 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2bf40c58 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2bf40c58 Branch: refs/heads/master Commit: 2bf40c58e6e89e061783c999204107069df17f73 Parents: 7a0da47 Author: vinodkc vinod.kc...@gmail.com Authored: Sun Mar 22 20:00:08 2015 + Committer: Sean Owen so...@cloudera.com Committed: Sun Mar 22 20:00:08 2015 + -- docs/sql-programming-guide.md| 7 +-- mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala | 2 +- sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2bf40c58/docs/sql-programming-guide.md -- diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index a7d3574..6a333fd 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -509,8 +509,11 @@ val people = sc.textFile(examples/src/main/resources/people.txt) // The schema is encoded in a string val schemaString = name age -// Import Spark SQL data types and Row. -import org.apache.spark.sql._ +// Import Row. +import org.apache.spark.sql.Row; + +// Import Spark SQL data types +import org.apache.spark.sql.types.{StructType,StructField,StringType}; // Generate the schema based on the string of schema val schema = http://git-wip-us.apache.org/repos/asf/spark/blob/2bf40c58/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala index 5bbcd2e..c4a3610 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala @@ -33,7 +33,7 @@ import org.apache.spark.sql.types.StructType abstract class PipelineStage extends Serializable with Logging { /** - * :: DeveloperAPI :: + * :: DeveloperApi :: * * Derives the output schema from the input schema and parameters. * The schema describes the columns and types of the data. http://git-wip-us.apache.org/repos/asf/spark/blob/2bf40c58/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala index 8b8f86c..5aece16 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala @@ -89,7 +89,7 @@ private[sql] object DataFrame { * val people = sqlContext.parquetFile(...) * val department = sqlContext.parquetFile(...) * - * people.filter(age 30) + * people.filter(age 30) * .join(department, people(deptId) === department(id)) * .groupBy(department(name), gender) * .agg(avg(people(salary)), max(people(age))) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-6122][Core] Upgrade Tachyon client version to 0.6.1.
Repository: spark Updated Branches: refs/heads/master 6ef48632f - a41b9c600 [SPARK-6122][Core] Upgrade Tachyon client version to 0.6.1. Changes the Tachyon client version from 0.5 to 0.6 in spark core and distribution script. New dependencies in Tachyon 0.6.0 include commons-codec:commons-codec:jar:1.5:compile io.netty:netty-all:jar:4.0.23.Final:compile These are already in spark core. Author: Calvin Jia jia.cal...@gmail.com Closes #4867 from calvinjia/upgrade_tachyon_0.6.0 and squashes the following commits: eed9230 [Calvin Jia] Update tachyon version to 0.6.1. 11907b3 [Calvin Jia] Use TachyonURI for tachyon paths instead of strings. 71bf441 [Calvin Jia] Upgrade Tachyon client version to 0.6.0. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a41b9c60 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a41b9c60 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a41b9c60 Branch: refs/heads/master Commit: a41b9c6004cfee84bd56dfa1faf5a0cf084551ae Parents: 6ef4863 Author: Calvin Jia jia.cal...@gmail.com Authored: Sun Mar 22 11:11:29 2015 -0700 Committer: Aaron Davidson aa...@databricks.com Committed: Sun Mar 22 11:11:29 2015 -0700 -- core/pom.xml| 2 +- .../spark/storage/TachyonBlockManager.scala | 27 ++-- .../scala/org/apache/spark/util/Utils.scala | 4 ++- make-distribution.sh| 2 +- 4 files changed, 18 insertions(+), 17 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a41b9c60/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index 6cd1965..868834d 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -275,7 +275,7 @@ dependency groupIdorg.tachyonproject/groupId artifactIdtachyon-client/artifactId - version0.5.0/version + version0.6.1/version exclusions exclusion groupIdorg.apache.hadoop/groupId http://git-wip-us.apache.org/repos/asf/spark/blob/a41b9c60/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala -- diff --git a/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala index af87303..2ab6a8f 100644 --- a/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala @@ -20,8 +20,8 @@ package org.apache.spark.storage import java.text.SimpleDateFormat import java.util.{Date, Random} -import tachyon.client.TachyonFS -import tachyon.client.TachyonFile +import tachyon.TachyonURI +import tachyon.client.{TachyonFile, TachyonFS} import org.apache.spark.Logging import org.apache.spark.executor.ExecutorExitCode @@ -40,7 +40,7 @@ private[spark] class TachyonBlockManager( val master: String) extends Logging { - val client = if (master != null master != ) TachyonFS.get(master) else null + val client = if (master != null master != ) TachyonFS.get(new TachyonURI(master)) else null if (client == null) { logError(Failed to connect to the Tachyon as the master address is not configured) @@ -60,11 +60,11 @@ private[spark] class TachyonBlockManager( addShutdownHook() def removeFile(file: TachyonFile): Boolean = { -client.delete(file.getPath(), false) +client.delete(new TachyonURI(file.getPath()), false) } def fileExists(file: TachyonFile): Boolean = { -client.exist(file.getPath()) +client.exist(new TachyonURI(file.getPath())) } def getFile(filename: String): TachyonFile = { @@ -81,7 +81,7 @@ private[spark] class TachyonBlockManager( if (old != null) { old } else { - val path = tachyonDirs(dirId) + / + %02x.format(subDirId) + val path = new TachyonURI(s${tachyonDirs(dirId)}/${%02x.format(subDirId)}) client.mkdir(path) val newDir = client.getFile(path) subDirs(dirId)(subDirId) = newDir @@ -89,7 +89,7 @@ private[spark] class TachyonBlockManager( } } } -val filePath = subDir + / + filename +val filePath = new TachyonURI(s$subDir/$filename) if(!client.exist(filePath)) { client.createFile(filePath) } @@ -101,7 +101,7 @@ private[spark] class TachyonBlockManager( // TODO: Some of the logic here could be consolidated/de-duplicated with that in the DiskStore. private def createTachyonDirs(): Array[TachyonFile] = { -logDebug(Creating tachyon directories at root dirs ' + rootDirs + ') +logDebug(sCreating tachyon directories at root dirs '$rootDirs') val dateFormat = new
spark git commit: [SPARK-4985] [SQL] parquet support for date type
Repository: spark Updated Branches: refs/heads/branch-1.3 857e8a60e - 60b9b96b2 [SPARK-4985] [SQL] parquet support for date type This PR might have some issues with #3732 , and this would have merge conflicts with #3820 so the review can be delayed till that 2 were merged. Author: Daoyuan Wang daoyuan.w...@intel.com Closes #3822 from adrian-wang/parquetdate and squashes the following commits: 2c5d54d [Daoyuan Wang] add a test case faef887 [Daoyuan Wang] parquet support for primitive date 97e9080 [Daoyuan Wang] parquet support for date type (cherry picked from commit 4659468f369d69e7f777130e5e3b4c5d47a624f1) Signed-off-by: Cheng Lian l...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/60b9b96b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/60b9b96b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/60b9b96b Branch: refs/heads/branch-1.3 Commit: 60b9b96b227ff1821530e29210cc0c338ce97528 Parents: 857e8a6 Author: Daoyuan Wang daoyuan.w...@intel.com Authored: Mon Mar 23 11:46:16 2015 +0800 Committer: Cheng Lian l...@databricks.com Committed: Mon Mar 23 11:46:36 2015 +0800 -- .../apache/spark/sql/parquet/ParquetConverter.scala | 12 .../spark/sql/parquet/ParquetTableSupport.scala | 2 ++ .../org/apache/spark/sql/parquet/ParquetTypes.scala | 4 .../apache/spark/sql/parquet/ParquetIOSuite.scala| 15 +++ .../spark/sql/parquet/ParquetSchemaSuite.scala | 3 ++- 5 files changed, 35 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/60b9b96b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala index f898e4b..43ca359 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala @@ -127,6 +127,12 @@ private[sql] object CatalystConverter { parent.updateByte(fieldIndex, value.asInstanceOf[ByteType.JvmType]) } } + case DateType = { +new CatalystPrimitiveConverter(parent, fieldIndex) { + override def addInt(value: Int): Unit = +parent.updateDate(fieldIndex, value.asInstanceOf[DateType.JvmType]) +} + } case d: DecimalType = { new CatalystPrimitiveConverter(parent, fieldIndex) { override def addBinary(value: Binary): Unit = @@ -192,6 +198,9 @@ private[parquet] abstract class CatalystConverter extends GroupConverter { protected[parquet] def updateInt(fieldIndex: Int, value: Int): Unit = updateField(fieldIndex, value) + protected[parquet] def updateDate(fieldIndex: Int, value: Int): Unit = +updateField(fieldIndex, value) + protected[parquet] def updateLong(fieldIndex: Int, value: Long): Unit = updateField(fieldIndex, value) @@ -388,6 +397,9 @@ private[parquet] class CatalystPrimitiveRowConverter( override protected[parquet] def updateInt(fieldIndex: Int, value: Int): Unit = current.setInt(fieldIndex, value) + override protected[parquet] def updateDate(fieldIndex: Int, value: Int): Unit = +current.update(fieldIndex, value) + override protected[parquet] def updateLong(fieldIndex: Int, value: Long): Unit = current.setLong(fieldIndex, value) http://git-wip-us.apache.org/repos/asf/spark/blob/60b9b96b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala index 19bfba3..5a1b154 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala @@ -212,6 +212,7 @@ private[parquet] class RowWriteSupport extends WriteSupport[Row] with Logging { case DoubleType = writer.addDouble(value.asInstanceOf[Double]) case FloatType = writer.addFloat(value.asInstanceOf[Float]) case BooleanType = writer.addBoolean(value.asInstanceOf[Boolean]) +case DateType = writer.addInteger(value.asInstanceOf[Int]) case d: DecimalType = if (d.precisionInfo == None || d.precisionInfo.get.precision 18) { sys.error(sUnsupported datatype $d, cannot write to consumer) @@ -358,6 +359,7 @@ private[parquet] class MutableRowWriteSupport extends RowWriteSupport { case
spark git commit: [SPARK-6397][SQL] Check the missingInput simply
Repository: spark Updated Branches: refs/heads/master 4659468f3 - e566fe598 [SPARK-6397][SQL] Check the missingInput simply Author: q00251598 qiyad...@huawei.com Closes #5082 from watermen/sql-missingInput and squashes the following commits: 25766b9 [q00251598] Check the missingInput simply Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e566fe59 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e566fe59 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e566fe59 Branch: refs/heads/master Commit: e566fe5982bac5d24e6be76e5d7d6270544a85e6 Parents: 4659468 Author: q00251598 qiyad...@huawei.com Authored: Mon Mar 23 12:06:13 2015 +0800 Committer: Cheng Lian l...@databricks.com Committed: Mon Mar 23 12:06:13 2015 +0800 -- .../org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala | 5 ++--- .../spark/sql/catalyst/plans/logical/basicOperators.scala | 2 ++ 2 files changed, 4 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e566fe59/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 4e8fc89..fb975ee 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -85,9 +85,8 @@ class CheckAnalysis { cleaned.foreach(checkValidAggregateExpression) - case o if o.children.nonEmpty -!o.references.filter(_.name != grouping__id).subsetOf(o.inputSet) = -val missingAttributes = (o.references -- o.inputSet).map(_.prettyString).mkString(,) + case o if o.children.nonEmpty o.missingInput.nonEmpty = +val missingAttributes = o.missingInput.map(_.prettyString).mkString(,) val input = o.inputSet.map(_.prettyString).mkString(,) failAnalysis(sresolved attributes $missingAttributes missing from $input) http://git-wip-us.apache.org/repos/asf/spark/blob/e566fe59/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala index 384fe53..a94b2d2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala @@ -191,6 +191,8 @@ case class Expand( val sizeInBytes = child.statistics.sizeInBytes * projections.length Statistics(sizeInBytes = sizeInBytes) } + + override def missingInput = super.missingInput.filter(_.name != VirtualColumn.groupingIdName) } trait GroupingAnalytics extends UnaryNode { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: Revert [SPARK-6397][SQL] Check the missingInput simply
Repository: spark Updated Branches: refs/heads/master e566fe598 - bf044def4 Revert [SPARK-6397][SQL] Check the missingInput simply This reverts commit e566fe5982bac5d24e6be76e5d7d6270544a85e6. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bf044def Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bf044def Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bf044def Branch: refs/heads/master Commit: bf044def4c3a37a0fd4d5e70c2d57685cfd9fd71 Parents: e566fe5 Author: Cheng Lian l...@databricks.com Authored: Mon Mar 23 12:15:19 2015 +0800 Committer: Cheng Lian l...@databricks.com Committed: Mon Mar 23 12:15:19 2015 +0800 -- .../org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala | 5 +++-- .../spark/sql/catalyst/plans/logical/basicOperators.scala | 2 -- 2 files changed, 3 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/bf044def/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index fb975ee..4e8fc89 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -85,8 +85,9 @@ class CheckAnalysis { cleaned.foreach(checkValidAggregateExpression) - case o if o.children.nonEmpty o.missingInput.nonEmpty = -val missingAttributes = o.missingInput.map(_.prettyString).mkString(,) + case o if o.children.nonEmpty +!o.references.filter(_.name != grouping__id).subsetOf(o.inputSet) = +val missingAttributes = (o.references -- o.inputSet).map(_.prettyString).mkString(,) val input = o.inputSet.map(_.prettyString).mkString(,) failAnalysis(sresolved attributes $missingAttributes missing from $input) http://git-wip-us.apache.org/repos/asf/spark/blob/bf044def/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala index a94b2d2..384fe53 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala @@ -191,8 +191,6 @@ case class Expand( val sizeInBytes = child.statistics.sizeInBytes * projections.length Statistics(sizeInBytes = sizeInBytes) } - - override def missingInput = super.missingInput.filter(_.name != VirtualColumn.groupingIdName) } trait GroupingAnalytics extends UnaryNode { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org