date:20150322

spark git commit: [SPARK-6408] [SQL] Fix JDBCRDD filtering string literals

2015-03-22 Thread lian

Repository: spark
Updated Branches:
  refs/heads/master b6090f902 - 9b1e1f20d


[SPARK-6408] [SQL] Fix JDBCRDD filtering string literals

Author: ypcat ypc...@gmail.com
Author: Pei-Lun Lee pl...@appier.com

Closes #5087 from ypcat/spark-6408 and squashes the following commits:

1becc16 [ypcat] [SPARK-6408] [SQL] styling
1bc4455 [ypcat] [SPARK-6408] [SQL] move nested function outside
e57fa4a [ypcat] [SPARK-6408] [SQL] fix test case
245ab6f [ypcat] [SPARK-6408] [SQL] add test cases for filtering quoted strings
8962534 [Pei-Lun Lee] [SPARK-6408] [SQL] Fix filtering string literals


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9b1e1f20
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9b1e1f20
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9b1e1f20

Branch: refs/heads/master
Commit: 9b1e1f20d4498bda72dd53a832110883a7ca41b5
Parents: b6090f9
Author: ypcat ypc...@gmail.com
Authored: Sun Mar 22 15:49:13 2015 +0800
Committer: Cheng Lian l...@databricks.com
Committed: Sun Mar 22 15:49:13 2015 +0800

--
 .../org/apache/spark/sql/jdbc/JDBCRDD.scala  | 19 ++-
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala| 12 ++--
 2 files changed, 24 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9b1e1f20/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
--
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
index 3266b97..76f8593 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.jdbc
 
 import java.sql.{Connection, DriverManager, ResultSet, ResultSetMetaData, 
SQLException}
 
+import org.apache.commons.lang.StringEscapeUtils.escapeSql
 import org.apache.spark.{Logging, Partition, SparkContext, TaskContext}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.expressions.{Row, SpecificMutableRow}
@@ -227,15 +228,23 @@ private[sql] class JDBCRDD(
   }
 
   /**
+   * Converts value to SQL expression.
+   */
+  private def compileValue(value: Any): Any = value match {
+case stringValue: String = s'${escapeSql(stringValue)}'
+case _ = value
+  }
+
+  /**
* Turns a single Filter into a String representing a SQL expression.
* Returns null for an unhandled filter.
*/
   private def compileFilter(f: Filter): String = f match {
-case EqualTo(attr, value) = s$attr = $value
-case LessThan(attr, value) = s$attr  $value
-case GreaterThan(attr, value) = s$attr  $value
-case LessThanOrEqual(attr, value) = s$attr = $value
-case GreaterThanOrEqual(attr, value) = s$attr = $value
+case EqualTo(attr, value) = s$attr = ${compileValue(value)}
+case LessThan(attr, value) = s$attr  ${compileValue(value)}
+case GreaterThan(attr, value) = s$attr  ${compileValue(value)}
+case LessThanOrEqual(attr, value) = s$attr = ${compileValue(value)}
+case GreaterThanOrEqual(attr, value) = s$attr = ${compileValue(value)}
 case _ = null
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/9b1e1f20/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
--
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index cd737c0..5eb6ab2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -24,6 +24,7 @@ import java.util.{Calendar, GregorianCalendar}
 import org.apache.spark.sql.test._
 import org.scalatest.{FunSuite, BeforeAndAfter}
 import TestSQLContext._
+import TestSQLContext.implicits._
 
 class JDBCSuite extends FunSuite with BeforeAndAfter {
   val url = jdbc:h2:mem:testdb0
@@ -38,7 +39,7 @@ class JDBCSuite extends FunSuite with BeforeAndAfter {
 conn.prepareStatement(create table test.people (name TEXT(32) NOT NULL, 
theid INTEGER NOT NULL)).executeUpdate()
 conn.prepareStatement(insert into test.people values ('fred', 
1)).executeUpdate()
 conn.prepareStatement(insert into test.people values ('mary', 
2)).executeUpdate()
-conn.prepareStatement(insert into test.people values ('joe', 
3)).executeUpdate()
+conn.prepareStatement(insert into test.people values ('joe ''foo'' 
\bar\', 3)).executeUpdate()
 conn.commit()
 
 sql(
@@ -129,13 +130,20 @@ class JDBCSuite extends FunSuite with BeforeAndAfter {
 assert(sql(SELECT * FROM foobar WHERE THEID  1).collect().size == 0)

spark git commit: [SPARK-6408] [SQL] Fix JDBCRDD filtering string literals

2015-03-22 Thread lian

Repository: spark
Updated Branches:
  refs/heads/branch-1.3 0021d2260 - e60fbf6c4


[SPARK-6408] [SQL] Fix JDBCRDD filtering string literals

Author: ypcat ypc...@gmail.com
Author: Pei-Lun Lee pl...@appier.com

Closes #5087 from ypcat/spark-6408 and squashes the following commits:

1becc16 [ypcat] [SPARK-6408] [SQL] styling
1bc4455 [ypcat] [SPARK-6408] [SQL] move nested function outside
e57fa4a [ypcat] [SPARK-6408] [SQL] fix test case
245ab6f [ypcat] [SPARK-6408] [SQL] add test cases for filtering quoted strings
8962534 [Pei-Lun Lee] [SPARK-6408] [SQL] Fix filtering string literals

(cherry picked from commit 9b1e1f20d4498bda72dd53a832110883a7ca41b5)
Signed-off-by: Cheng Lian l...@databricks.com


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e60fbf6c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e60fbf6c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e60fbf6c

Branch: refs/heads/branch-1.3
Commit: e60fbf6c4597d0240ab1b472594774beb4f3d391
Parents: 0021d22
Author: ypcat ypc...@gmail.com
Authored: Sun Mar 22 15:49:13 2015 +0800
Committer: Cheng Lian l...@databricks.com
Committed: Sun Mar 22 15:49:38 2015 +0800

--
 .../org/apache/spark/sql/jdbc/JDBCRDD.scala  | 19 ++-
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala| 12 ++--
 2 files changed, 24 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/e60fbf6c/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
--
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
index 3266b97..76f8593 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.jdbc
 
 import java.sql.{Connection, DriverManager, ResultSet, ResultSetMetaData, 
SQLException}
 
+import org.apache.commons.lang.StringEscapeUtils.escapeSql
 import org.apache.spark.{Logging, Partition, SparkContext, TaskContext}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.expressions.{Row, SpecificMutableRow}
@@ -227,15 +228,23 @@ private[sql] class JDBCRDD(
   }
 
   /**
+   * Converts value to SQL expression.
+   */
+  private def compileValue(value: Any): Any = value match {
+case stringValue: String = s'${escapeSql(stringValue)}'
+case _ = value
+  }
+
+  /**
* Turns a single Filter into a String representing a SQL expression.
* Returns null for an unhandled filter.
*/
   private def compileFilter(f: Filter): String = f match {
-case EqualTo(attr, value) = s$attr = $value
-case LessThan(attr, value) = s$attr  $value
-case GreaterThan(attr, value) = s$attr  $value
-case LessThanOrEqual(attr, value) = s$attr = $value
-case GreaterThanOrEqual(attr, value) = s$attr = $value
+case EqualTo(attr, value) = s$attr = ${compileValue(value)}
+case LessThan(attr, value) = s$attr  ${compileValue(value)}
+case GreaterThan(attr, value) = s$attr  ${compileValue(value)}
+case LessThanOrEqual(attr, value) = s$attr = ${compileValue(value)}
+case GreaterThanOrEqual(attr, value) = s$attr = ${compileValue(value)}
 case _ = null
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/e60fbf6c/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
--
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index cd737c0..5eb6ab2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -24,6 +24,7 @@ import java.util.{Calendar, GregorianCalendar}
 import org.apache.spark.sql.test._
 import org.scalatest.{FunSuite, BeforeAndAfter}
 import TestSQLContext._
+import TestSQLContext.implicits._
 
 class JDBCSuite extends FunSuite with BeforeAndAfter {
   val url = jdbc:h2:mem:testdb0
@@ -38,7 +39,7 @@ class JDBCSuite extends FunSuite with BeforeAndAfter {
 conn.prepareStatement(create table test.people (name TEXT(32) NOT NULL, 
theid INTEGER NOT NULL)).executeUpdate()
 conn.prepareStatement(insert into test.people values ('fred', 
1)).executeUpdate()
 conn.prepareStatement(insert into test.people values ('mary', 
2)).executeUpdate()
-conn.prepareStatement(insert into test.people values ('joe', 
3)).executeUpdate()
+conn.prepareStatement(insert into test.people values ('joe ''foo'' 
\bar\', 3)).executeUpdate()
 conn.commit()
 
 sql(
@@ -129,13 +130,20 @@ class JDBCSuite extends

[2/2] spark git commit: [SPARK-6132] ContextCleaner race condition across SparkContexts

2015-03-22 Thread srowen

[SPARK-6132] ContextCleaner race condition across SparkContexts

The problem is that `ContextCleaner` may clean variables that belong to a 
different `SparkContext`. This can happen if the `SparkContext` to which the 
cleaner belongs stops, and a new one is started immediately afterwards in the 
same JVM. In this case, if the cleaner is in the middle of cleaning a 
broadcast, for instance, it will do so through `SparkEnv.get.blockManager`, 
which could be one that belongs to a different `SparkContext`.

JoshRosen and I suspect that this is the cause of many flaky tests, most 
notably the `JavaAPISuite`. We were able to reproduce the failure locally 
(though it is not deterministic and very hard to reproduce).

Author: Andrew Or and...@databricks.com

Closes #4869 from andrewor14/cleaner-masquerade and squashes the following 
commits:

29168c0 [Andrew Or] Synchronize ContextCleaner stop


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/06d883c3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/06d883c3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/06d883c3

Branch: refs/heads/branch-1.2
Commit: 06d883c3735986c88585565d4e66a5231431a4b8
Parents: a2a94a1
Author: Andrew Or and...@databricks.com
Authored: Tue Mar 3 13:44:05 2015 -0800
Committer: Sean Owen so...@cloudera.com
Committed: Sun Mar 22 13:05:11 2015 +

--
 .../scala/org/apache/spark/ContextCleaner.scala | 35 ++--
 1 file changed, 24 insertions(+), 11 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/06d883c3/core/src/main/scala/org/apache/spark/ContextCleaner.scala
--
diff --git a/core/src/main/scala/org/apache/spark/ContextCleaner.scala 
b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
index ede1e23..201e5ec 100644
--- a/core/src/main/scala/org/apache/spark/ContextCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
@@ -104,9 +104,19 @@ private[spark] class ContextCleaner(sc: SparkContext) 
extends Logging {
 cleaningThread.start()
   }
 
-  /** Stop the cleaner. */
+  /**
+   * Stop the cleaning thread and wait until the thread has finished running 
its current task.
+   */
   def stop() {
 stopped = true
+// Interrupt the cleaning thread, but wait until the current task has 
finished before
+// doing so. This guards against the race condition where a cleaning 
thread may
+// potentially clean similarly named variables created by a different 
SparkContext,
+// resulting in otherwise inexplicable block-not-found exceptions 
(SPARK-6132).
+synchronized {
+  cleaningThread.interrupt()
+}
+cleaningThread.join()
   }
 
   /** Register a RDD for cleanup when it is garbage collected. */
@@ -135,16 +145,19 @@ private[spark] class ContextCleaner(sc: SparkContext) 
extends Logging {
   try {
 val reference = 
Option(referenceQueue.remove(ContextCleaner.REF_QUEUE_POLL_TIMEOUT))
   .map(_.asInstanceOf[CleanupTaskWeakReference])
-reference.map(_.task).foreach { task =
-  logDebug(Got cleaning task  + task)
-  referenceBuffer -= reference.get
-  task match {
-case CleanRDD(rddId) =
-  doCleanupRDD(rddId, blocking = blockOnCleanupTasks)
-case CleanShuffle(shuffleId) =
-  doCleanupShuffle(shuffleId, blocking = 
blockOnShuffleCleanupTasks)
-case CleanBroadcast(broadcastId) =
-  doCleanupBroadcast(broadcastId, blocking = blockOnCleanupTasks)
+// Synchronize here to avoid being interrupted on stop()
+synchronized {
+  reference.map(_.task).foreach { task =
+logDebug(Got cleaning task  + task)
+referenceBuffer -= reference.get
+task match {
+  case CleanRDD(rddId) =
+doCleanupRDD(rddId, blocking = blockOnCleanupTasks)
+  case CleanShuffle(shuffleId) =
+doCleanupShuffle(shuffleId, blocking = 
blockOnShuffleCleanupTasks)
+  case CleanBroadcast(broadcastId) =
+doCleanupBroadcast(broadcastId, blocking = blockOnCleanupTasks)
+}
   }
 }
   } catch {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[1/2] spark git commit: [SPARK-6132][HOTFIX] ContextCleaner InterruptedException should be quiet

2015-03-22 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-1.2 a2a94a154 - abdcec673


[SPARK-6132][HOTFIX] ContextCleaner InterruptedException should be quiet

If the cleaner is stopped, we shouldn't print a huge stack trace when the 
cleaner thread is interrupted because we purposefully did this.

Author: Andrew Or and...@databricks.com

Closes #4882 from andrewor14/cleaner-interrupt and squashes the following 
commits:

8652120 [Andrew Or] Just a hot fix


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/abdcec67
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/abdcec67
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/abdcec67

Branch: refs/heads/branch-1.2
Commit: abdcec673f722151cfde59111cf38a0842cc11ba
Parents: 06d883c
Author: Andrew Or and...@databricks.com
Authored: Tue Mar 3 20:49:45 2015 -0800
Committer: Sean Owen so...@cloudera.com
Committed: Sun Mar 22 13:05:11 2015 +

--
 core/src/main/scala/org/apache/spark/ContextCleaner.scala | 1 +
 1 file changed, 1 insertion(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/abdcec67/core/src/main/scala/org/apache/spark/ContextCleaner.scala
--
diff --git a/core/src/main/scala/org/apache/spark/ContextCleaner.scala 
b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
index 201e5ec..98e4401 100644
--- a/core/src/main/scala/org/apache/spark/ContextCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
@@ -161,6 +161,7 @@ private[spark] class ContextCleaner(sc: SparkContext) 
extends Logging {
   }
 }
   } catch {
+case ie: InterruptedException if stopped = // ignore
 case e: Exception = logError(Error in cleaning thread, e)
   }
 }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-6448] Make history server log parse exceptions

2015-03-22 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 9b1e1f20d - b9fe504b4


[SPARK-6448] Make history server log parse exceptions

This helped me to debug a parse error that was due to the event log format 
changing recently.

Author: Ryan Williams ryan.blake.willi...@gmail.com

Closes #5122 from ryan-williams/histerror and squashes the following commits:

5831656 [Ryan Williams] line length
c3742ae [Ryan Williams] Make history server log parse exceptions


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b9fe504b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b9fe504b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b9fe504b

Branch: refs/heads/master
Commit: b9fe504b497cfa509310b4045de4873739c76667
Parents: 9b1e1f2
Author: Ryan Williams ryan.blake.willi...@gmail.com
Authored: Sun Mar 22 11:54:23 2015 +
Committer: Sean Owen so...@cloudera.com
Committed: Sun Mar 22 11:54:23 2015 +

--
 .../scala/org/apache/spark/deploy/history/FsHistoryProvider.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b9fe504b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala 
b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 7fde020..db7c499 100644
--- 
a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ 
b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -233,7 +233,8 @@ private[history] class FsHistoryProvider(conf: SparkConf) 
extends ApplicationHis
   } catch {
 case e: Exception =
   logError(
-sException encountered when attempting to load application log 
${fileStatus.getPath})
+sException encountered when attempting to load application log 
${fileStatus.getPath},
+e)
   None
   }
 }.toSeq.sortWith(compareAppInfo)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[1/2] spark git commit: [SPARK-6132] ContextCleaner race condition across SparkContexts

2015-03-22 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-1.1 c5836816f - 39761f515


[SPARK-6132] ContextCleaner race condition across SparkContexts

The problem is that `ContextCleaner` may clean variables that belong to a 
different `SparkContext`. This can happen if the `SparkContext` to which the 
cleaner belongs stops, and a new one is started immediately afterwards in the 
same JVM. In this case, if the cleaner is in the middle of cleaning a 
broadcast, for instance, it will do so through `SparkEnv.get.blockManager`, 
which could be one that belongs to a different `SparkContext`.

JoshRosen and I suspect that this is the cause of many flaky tests, most 
notably the `JavaAPISuite`. We were able to reproduce the failure locally 
(though it is not deterministic and very hard to reproduce).

Author: Andrew Or and...@databricks.com

Closes #4869 from andrewor14/cleaner-masquerade and squashes the following 
commits:

29168c0 [Andrew Or] Synchronize ContextCleaner stop


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e445ce61
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e445ce61
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e445ce61

Branch: refs/heads/branch-1.1
Commit: e445ce61d02778beaa30a2d1867e5b06fe09fb5d
Parents: c583681
Author: Andrew Or and...@databricks.com
Authored: Tue Mar 3 13:44:05 2015 -0800
Committer: Sean Owen so...@cloudera.com
Committed: Sun Mar 22 13:08:14 2015 +

--
 .../scala/org/apache/spark/ContextCleaner.scala | 35 ++--
 1 file changed, 24 insertions(+), 11 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/e445ce61/core/src/main/scala/org/apache/spark/ContextCleaner.scala
--
diff --git a/core/src/main/scala/org/apache/spark/ContextCleaner.scala 
b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
index ede1e23..201e5ec 100644
--- a/core/src/main/scala/org/apache/spark/ContextCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
@@ -104,9 +104,19 @@ private[spark] class ContextCleaner(sc: SparkContext) 
extends Logging {
 cleaningThread.start()
   }
 
-  /** Stop the cleaner. */
+  /**
+   * Stop the cleaning thread and wait until the thread has finished running 
its current task.
+   */
   def stop() {
 stopped = true
+// Interrupt the cleaning thread, but wait until the current task has 
finished before
+// doing so. This guards against the race condition where a cleaning 
thread may
+// potentially clean similarly named variables created by a different 
SparkContext,
+// resulting in otherwise inexplicable block-not-found exceptions 
(SPARK-6132).
+synchronized {
+  cleaningThread.interrupt()
+}
+cleaningThread.join()
   }
 
   /** Register a RDD for cleanup when it is garbage collected. */
@@ -135,16 +145,19 @@ private[spark] class ContextCleaner(sc: SparkContext) 
extends Logging {
   try {
 val reference = 
Option(referenceQueue.remove(ContextCleaner.REF_QUEUE_POLL_TIMEOUT))
   .map(_.asInstanceOf[CleanupTaskWeakReference])
-reference.map(_.task).foreach { task =
-  logDebug(Got cleaning task  + task)
-  referenceBuffer -= reference.get
-  task match {
-case CleanRDD(rddId) =
-  doCleanupRDD(rddId, blocking = blockOnCleanupTasks)
-case CleanShuffle(shuffleId) =
-  doCleanupShuffle(shuffleId, blocking = 
blockOnShuffleCleanupTasks)
-case CleanBroadcast(broadcastId) =
-  doCleanupBroadcast(broadcastId, blocking = blockOnCleanupTasks)
+// Synchronize here to avoid being interrupted on stop()
+synchronized {
+  reference.map(_.task).foreach { task =
+logDebug(Got cleaning task  + task)
+referenceBuffer -= reference.get
+task match {
+  case CleanRDD(rddId) =
+doCleanupRDD(rddId, blocking = blockOnCleanupTasks)
+  case CleanShuffle(shuffleId) =
+doCleanupShuffle(shuffleId, blocking = 
blockOnShuffleCleanupTasks)
+  case CleanBroadcast(broadcastId) =
+doCleanupBroadcast(broadcastId, blocking = blockOnCleanupTasks)
+}
   }
 }
   } catch {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[2/2] spark git commit: [SPARK-6132][HOTFIX] ContextCleaner InterruptedException should be quiet

2015-03-22 Thread srowen

[SPARK-6132][HOTFIX] ContextCleaner InterruptedException should be quiet

If the cleaner is stopped, we shouldn't print a huge stack trace when the 
cleaner thread is interrupted because we purposefully did this.

Author: Andrew Or and...@databricks.com

Closes #4882 from andrewor14/cleaner-interrupt and squashes the following 
commits:

8652120 [Andrew Or] Just a hot fix


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/39761f51
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/39761f51
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/39761f51

Branch: refs/heads/branch-1.1
Commit: 39761f515d65afff377873ee4701b9313c317a60
Parents: e445ce6
Author: Andrew Or and...@databricks.com
Authored: Tue Mar 3 20:49:45 2015 -0800
Committer: Sean Owen so...@cloudera.com
Committed: Sun Mar 22 13:08:34 2015 +

--
 core/src/main/scala/org/apache/spark/ContextCleaner.scala | 1 +
 1 file changed, 1 insertion(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/39761f51/core/src/main/scala/org/apache/spark/ContextCleaner.scala
--
diff --git a/core/src/main/scala/org/apache/spark/ContextCleaner.scala 
b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
index 201e5ec..98e4401 100644
--- a/core/src/main/scala/org/apache/spark/ContextCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
@@ -161,6 +161,7 @@ private[spark] class ContextCleaner(sc: SparkContext) 
extends Logging {
   }
 }
   } catch {
+case ie: InterruptedException if stopped = // ignore
 case e: Exception = logError(Error in cleaning thread, e)
   }
 }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: SPARK-6454 [DOCS] Fix links to pyspark api

2015-03-22 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-1.3 e60fbf6c4 - 3ba295fa0


SPARK-6454 [DOCS] Fix links to pyspark api

Author: Kamil Smuga smugaka...@gmail.com
Author: stderr smugaka...@gmail.com

Closes #5120 from kamilsmuga/master and squashes the following commits:

fee3281 [Kamil Smuga] more python api links fixed for docs
13240cb [Kamil Smuga] resolved merge conflicts with upstream/master
6649b3b [Kamil Smuga] fix broken docs links to Python API
92f03d7 [stderr] Fix links to pyspark api

(cherry picked from commit 6ef48632fbf3e6659ceacaab1dbb8be8238d4d33)
Signed-off-by: Sean Owen so...@cloudera.com


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3ba295fa
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3ba295fa
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3ba295fa

Branch: refs/heads/branch-1.3
Commit: 3ba295fa08d7e57f507319bae621ef7d788a0d23
Parents: e60fbf6
Author: Kamil Smuga smugaka...@gmail.com
Authored: Sun Mar 22 15:56:25 2015 +
Committer: Sean Owen so...@cloudera.com
Committed: Sun Mar 22 15:57:15 2015 +

--
 docs/mllib-data-types.md  |  8 
 docs/mllib-naive-bayes.md |  6 +++---
 docs/mllib-statistics.md  | 10 +-
 docs/programming-guide.md | 12 ++--
 docs/sql-programming-guide.md |  2 +-
 5 files changed, 19 insertions(+), 19 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3ba295fa/docs/mllib-data-types.md
--
diff --git a/docs/mllib-data-types.md b/docs/mllib-data-types.md
index fe6c1bf..4f2a2f7 100644
--- a/docs/mllib-data-types.md
+++ b/docs/mllib-data-types.md
@@ -78,13 +78,13 @@ MLlib recognizes the following types as dense vectors:
 
 and the following as sparse vectors:
 
-* MLlib's 
[`SparseVector`](api/python/pyspark.mllib.linalg.SparseVector-class.html).
+* MLlib's 
[`SparseVector`](api/python/pyspark.mllib.html#pyspark.mllib.linalg.SparseVector).
 * SciPy's
   
[`csc_matrix`](http://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csc_matrix.html#scipy.sparse.csc_matrix)
   with a single column
 
 We recommend using NumPy arrays over lists for efficiency, and using the 
factory methods implemented
-in [`Vectors`](api/python/pyspark.mllib.linalg.Vectors-class.html) to create 
sparse vectors.
+in [`Vectors`](api/python/pyspark.mllib.html#pyspark.mllib.linalg.Vector) to 
create sparse vectors.
 
 {% highlight python %}
 import numpy as np
@@ -151,7 +151,7 @@ LabeledPoint neg = new LabeledPoint(1.0, Vectors.sparse(3, 
new int[] {0, 2}, new
 div data-lang=python markdown=1
 
 A labeled point is represented by
-[`LabeledPoint`](api/python/pyspark.mllib.regression.LabeledPoint-class.html).
+[`LabeledPoint`](api/python/pyspark.mllib.html#pyspark.mllib.regression.LabeledPoint).
 
 {% highlight python %}
 from pyspark.mllib.linalg import SparseVector
@@ -211,7 +211,7 @@ JavaRDDLabeledPoint examples =
 /div
 
 div data-lang=python markdown=1
-[`MLUtils.loadLibSVMFile`](api/python/pyspark.mllib.util.MLUtils-class.html) 
reads training
+[`MLUtils.loadLibSVMFile`](api/python/pyspark.mllib.html#pyspark.mllib.util.MLUtils)
 reads training
 examples stored in LIBSVM format.
 
 {% highlight python %}

http://git-wip-us.apache.org/repos/asf/spark/blob/3ba295fa/docs/mllib-naive-bayes.md
--
diff --git a/docs/mllib-naive-bayes.md b/docs/mllib-naive-bayes.md
index 55b8f2c..a83472f 100644
--- a/docs/mllib-naive-bayes.md
+++ b/docs/mllib-naive-bayes.md
@@ -106,11 +106,11 @@ NaiveBayesModel sameModel = NaiveBayesModel.load(sc.sc(), 
myModelPath);
 
 div data-lang=python markdown=1
 
-[NaiveBayes](api/python/pyspark.mllib.classification.NaiveBayes-class.html) 
implements multinomial
+[NaiveBayes](api/python/pyspark.mllib.html#pyspark.mllib.classification.NaiveBayes)
 implements multinomial
 naive Bayes. It takes an RDD of
-[LabeledPoint](api/python/pyspark.mllib.regression.LabeledPoint-class.html) 
and an optionally
+[LabeledPoint](api/python/pyspark.mllib.html#pyspark.mllib.regression.LabeledPoint)
 and an optionally
 smoothing parameter `lambda` as input, and output a
-[NaiveBayesModel](api/python/pyspark.mllib.classification.NaiveBayesModel-class.html),
 which can be
+[NaiveBayesModel](api/python/pyspark.mllib.html#pyspark.mllib.classification.NaiveBayesModel),
 which can be
 used for evaluation and prediction.
 
 Note that the Python API does not yet support model save/load but will in the 
future.

http://git-wip-us.apache.org/repos/asf/spark/blob/3ba295fa/docs/mllib-statistics.md
--
diff --git a/docs/mllib-statistics.md b/docs/mllib-statistics.md
index ca8c292..887eae7 100644
---

spark git commit: SPARK-6454 [DOCS] Fix links to pyspark api

2015-03-22 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master adb2ff752 - 6ef48632f


SPARK-6454 [DOCS] Fix links to pyspark api

Author: Kamil Smuga smugaka...@gmail.com
Author: stderr smugaka...@gmail.com

Closes #5120 from kamilsmuga/master and squashes the following commits:

fee3281 [Kamil Smuga] more python api links fixed for docs
13240cb [Kamil Smuga] resolved merge conflicts with upstream/master
6649b3b [Kamil Smuga] fix broken docs links to Python API
92f03d7 [stderr] Fix links to pyspark api


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6ef48632
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6ef48632
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6ef48632

Branch: refs/heads/master
Commit: 6ef48632fbf3e6659ceacaab1dbb8be8238d4d33
Parents: adb2ff7
Author: Kamil Smuga smugaka...@gmail.com
Authored: Sun Mar 22 15:56:25 2015 +
Committer: Sean Owen so...@cloudera.com
Committed: Sun Mar 22 15:56:25 2015 +

--
 docs/mllib-data-types.md  |  8 
 docs/mllib-naive-bayes.md |  6 +++---
 docs/mllib-statistics.md  | 10 +-
 docs/programming-guide.md | 12 ++--
 docs/sql-programming-guide.md |  2 +-
 5 files changed, 19 insertions(+), 19 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/6ef48632/docs/mllib-data-types.md
--
diff --git a/docs/mllib-data-types.md b/docs/mllib-data-types.md
index fe6c1bf..4f2a2f7 100644
--- a/docs/mllib-data-types.md
+++ b/docs/mllib-data-types.md
@@ -78,13 +78,13 @@ MLlib recognizes the following types as dense vectors:
 
 and the following as sparse vectors:
 
-* MLlib's 
[`SparseVector`](api/python/pyspark.mllib.linalg.SparseVector-class.html).
+* MLlib's 
[`SparseVector`](api/python/pyspark.mllib.html#pyspark.mllib.linalg.SparseVector).
 * SciPy's
   
[`csc_matrix`](http://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csc_matrix.html#scipy.sparse.csc_matrix)
   with a single column
 
 We recommend using NumPy arrays over lists for efficiency, and using the 
factory methods implemented
-in [`Vectors`](api/python/pyspark.mllib.linalg.Vectors-class.html) to create 
sparse vectors.
+in [`Vectors`](api/python/pyspark.mllib.html#pyspark.mllib.linalg.Vector) to 
create sparse vectors.
 
 {% highlight python %}
 import numpy as np
@@ -151,7 +151,7 @@ LabeledPoint neg = new LabeledPoint(1.0, Vectors.sparse(3, 
new int[] {0, 2}, new
 div data-lang=python markdown=1
 
 A labeled point is represented by
-[`LabeledPoint`](api/python/pyspark.mllib.regression.LabeledPoint-class.html).
+[`LabeledPoint`](api/python/pyspark.mllib.html#pyspark.mllib.regression.LabeledPoint).
 
 {% highlight python %}
 from pyspark.mllib.linalg import SparseVector
@@ -211,7 +211,7 @@ JavaRDDLabeledPoint examples =
 /div
 
 div data-lang=python markdown=1
-[`MLUtils.loadLibSVMFile`](api/python/pyspark.mllib.util.MLUtils-class.html) 
reads training
+[`MLUtils.loadLibSVMFile`](api/python/pyspark.mllib.html#pyspark.mllib.util.MLUtils)
 reads training
 examples stored in LIBSVM format.
 
 {% highlight python %}

http://git-wip-us.apache.org/repos/asf/spark/blob/6ef48632/docs/mllib-naive-bayes.md
--
diff --git a/docs/mllib-naive-bayes.md b/docs/mllib-naive-bayes.md
index 55b8f2c..a83472f 100644
--- a/docs/mllib-naive-bayes.md
+++ b/docs/mllib-naive-bayes.md
@@ -106,11 +106,11 @@ NaiveBayesModel sameModel = NaiveBayesModel.load(sc.sc(), 
myModelPath);
 
 div data-lang=python markdown=1
 
-[NaiveBayes](api/python/pyspark.mllib.classification.NaiveBayes-class.html) 
implements multinomial
+[NaiveBayes](api/python/pyspark.mllib.html#pyspark.mllib.classification.NaiveBayes)
 implements multinomial
 naive Bayes. It takes an RDD of
-[LabeledPoint](api/python/pyspark.mllib.regression.LabeledPoint-class.html) 
and an optionally
+[LabeledPoint](api/python/pyspark.mllib.html#pyspark.mllib.regression.LabeledPoint)
 and an optionally
 smoothing parameter `lambda` as input, and output a
-[NaiveBayesModel](api/python/pyspark.mllib.classification.NaiveBayesModel-class.html),
 which can be
+[NaiveBayesModel](api/python/pyspark.mllib.html#pyspark.mllib.classification.NaiveBayesModel),
 which can be
 used for evaluation and prediction.
 
 Note that the Python API does not yet support model save/load but will in the 
future.

http://git-wip-us.apache.org/repos/asf/spark/blob/6ef48632/docs/mllib-statistics.md
--
diff --git a/docs/mllib-statistics.md b/docs/mllib-statistics.md
index ca8c292..887eae7 100644
--- a/docs/mllib-statistics.md
+++ b/docs/mllib-statistics.md
@@ -81,8 +81,8 @@ System.out.println(summary.numNonzeros()); // number of

spark git commit: [SPARK-6455] [docs] Correct some mistakes and typos

2015-03-22 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master b9fe504b4 - ab4f516fb


[SPARK-6455] [docs] Correct some mistakes and typos

Correct some typos. Correct a mistake in lib/PageRank.scala. The first PageRank 
implementation uses standalone Graph interface, but the second uses Pregel 
interface. It may mislead the code viewers.

Author: Hangchen Yu y...@gitcafe.com

Closes #5128 from yuhc/master and squashes the following commits:

53e5432 [Hangchen Yu] Merge branch 'master' of https://github.com/yuhc/spark
67b77b5 [Hangchen Yu] [SPARK-6455] [docs] Correct some mistakes and typos
206f2dc [Hangchen Yu] Correct some mistakes and typos.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ab4f516f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ab4f516f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ab4f516f

Branch: refs/heads/master
Commit: ab4f516fbe63e24e076c68f4933a171a72b6f1fd
Parents: b9fe504
Author: Hangchen Yu y...@gitcafe.com
Authored: Sun Mar 22 15:51:10 2015 +
Committer: Sean Owen so...@cloudera.com
Committed: Sun Mar 22 15:51:10 2015 +

--
 graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala | 4 ++--
 graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala   | 4 ++--
 graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ab4f516f/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
--
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala 
b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
index dc8b478..86f611d 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
@@ -113,7 +113,7 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, 
ED]) extends Seriali
* Collect the neighbor vertex attributes for each vertex.
*
* @note This function could be highly inefficient on power-law
-   * graphs where high degree vertices may force a large ammount of
+   * graphs where high degree vertices may force a large amount of
* information to be collected to a single location.
*
* @param edgeDirection the direction along which to collect
@@ -187,7 +187,7 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, 
ED]) extends Seriali
 
   /**
* Join the vertices with an RDD and then apply a function from the
-   * the vertex and RDD entry to a new vertex value.  The input table
+   * vertex and RDD entry to a new vertex value.  The input table
* should contain at most one entry for each vertex.  If no entry is
* provided the map function is skipped and the old value is used.
*

http://git-wip-us.apache.org/repos/asf/spark/blob/ab4f516f/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
--
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala 
b/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
index 5e55620..01b013f 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
@@ -78,8 +78,8 @@ object Pregel extends Logging {
*
* @param graph the input graph.
*
-   * @param initialMsg the message each vertex will receive at the on
-   * the first iteration
+   * @param initialMsg the message each vertex will receive at the first
+   * iteration
*
* @param maxIterations the maximum number of iterations to run for
*

http://git-wip-us.apache.org/repos/asf/spark/blob/ab4f516f/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
--
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala 
b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
index e139959..ca3b513 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
@@ -25,7 +25,7 @@ import org.apache.spark.graphx._
 /**
  * PageRank algorithm implementation. There are two implementations of 
PageRank implemented.
  *
- * The first implementation uses the [[Pregel]] interface and runs PageRank 
for a fixed number
+ * The first implementation uses the standalone [[Graph]] interface and runs 
PageRank for a fixed number
  * of iterations:
  * {{{
  * var PR = Array.fill(n)( 1.0 )
@@ -38,7 +38,7 @@ import org.apache.spark.graphx._
  * }
  * }}}
  *
- * The second implementation uses the standalone [[Graph]] interface and runs 
PageRank until
+ * The

spark git commit: [SPARK-6453][Mesos] Some Mesos*Suite have a different package with their classes

2015-03-22 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master ab4f516fb - adb2ff752


[SPARK-6453][Mesos] Some Mesos*Suite have a different package with their classes

- Moved Suites from o.a.s.s.mesos to o.a.s.s.cluster.mesos

Author: Jongyoul Lee jongy...@gmail.com

Closes #5126 from jongyoul/SPARK-6453 and squashes the following commits:

4f24a3e [Jongyoul Lee] [SPARK-6453][Mesos] Some Mesos*Suite have a different 
package with their classes - Fixed imports orders
8ab149d [Jongyoul Lee] [SPARK-6453][Mesos] Some Mesos*Suite have a different 
package with their classes - Moved Suites from o.a.s.s.mesos to 
o.a.s.s.cluster.mesos


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/adb2ff75
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/adb2ff75
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/adb2ff75

Branch: refs/heads/master
Commit: adb2ff752fa8bda54c969b60a3168d87cd70237d
Parents: ab4f516
Author: Jongyoul Lee jongy...@gmail.com
Authored: Sun Mar 22 15:53:18 2015 +
Committer: Sean Owen so...@cloudera.com
Committed: Sun Mar 22 15:54:19 2015 +

--
 .../mesos/MesosSchedulerBackendSuite.scala  | 168 ++
 .../mesos/MesosTaskLaunchDataSuite.scala|  36 
 .../mesos/MesosSchedulerBackendSuite.scala  | 169 ---
 .../mesos/MesosTaskLaunchDataSuite.scala|  38 -
 4 files changed, 204 insertions(+), 207 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/adb2ff75/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendSuite.scala
--
diff --git 
a/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendSuite.scala
 
b/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendSuite.scala
new file mode 100644
index 000..f1a4380
--- /dev/null
+++ 
b/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendSuite.scala
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the License); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler.cluster.mesos
+
+import java.nio.ByteBuffer
+import java.util
+import java.util.Collections
+
+import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.mesos.Protos.Value.Scalar
+import org.apache.mesos.Protos._
+import org.apache.mesos.SchedulerDriver
+import org.mockito.Matchers._
+import org.mockito.Mockito._
+import org.mockito.{ArgumentCaptor, Matchers}
+import org.scalatest.FunSuite
+import org.scalatest.mock.MockitoSugar
+
+import org.apache.spark.executor.MesosExecutorBackend
+import org.apache.spark.scheduler.cluster.ExecutorInfo
+import org.apache.spark.scheduler.{LiveListenerBus, SparkListenerExecutorAdded,
+  TaskDescription, TaskSchedulerImpl, WorkerOffer}
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext}
+
+class MesosSchedulerBackendSuite extends FunSuite with LocalSparkContext with 
MockitoSugar {
+
+  test(check spark-class location correctly) {
+val conf = new SparkConf
+conf.set(spark.mesos.executor.home , /mesos-home)
+
+val listenerBus = mock[LiveListenerBus]
+listenerBus.post(
+  SparkListenerExecutorAdded(anyLong, s1, new ExecutorInfo(host1, 2, 
Map.empty)))
+
+val sc = mock[SparkContext]
+when(sc.getSparkHome()).thenReturn(Option(/spark-home))
+
+when(sc.conf).thenReturn(conf)
+when(sc.executorEnvs).thenReturn(new mutable.HashMap[String, String])
+when(sc.executorMemory).thenReturn(100)
+when(sc.listenerBus).thenReturn(listenerBus)
+val taskScheduler = mock[TaskSchedulerImpl]
+when(taskScheduler.CPUS_PER_TASK).thenReturn(2)
+
+val mesosSchedulerBackend = new MesosSchedulerBackend(taskScheduler, sc, 
master)
+
+// uri is null.
+val executorInfo = mesosSchedulerBackend.createExecutorInfo(test-id)
+assert(executorInfo.getCommand.getValue === s /mesos-home/bin/spark-class 
${classOf[MesosExecutorBackend].getName})
+
+// uri exists.
+

spark git commit: [HOTFIX] Build break due to https://github.com/apache/spark/pull/5128

2015-03-22 Thread rxin

Repository: spark
Updated Branches:
  refs/heads/master a41b9c600 - 7a0da4770


[HOTFIX] Build break due to https://github.com/apache/spark/pull/5128


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7a0da477
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7a0da477
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7a0da477

Branch: refs/heads/master
Commit: 7a0da47708b0e6b117b5c1a35aa3e93b8a914d5f
Parents: a41b9c6
Author: Reynold Xin r...@databricks.com
Authored: Sun Mar 22 12:08:15 2015 -0700
Committer: Reynold Xin r...@databricks.com
Committed: Sun Mar 22 12:08:15 2015 -0700

--
 graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7a0da477/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
--
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala 
b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
index ca3b513..570440b 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
@@ -25,8 +25,8 @@ import org.apache.spark.graphx._
 /**
  * PageRank algorithm implementation. There are two implementations of 
PageRank implemented.
  *
- * The first implementation uses the standalone [[Graph]] interface and runs 
PageRank for a fixed number
- * of iterations:
+ * The first implementation uses the standalone [[Graph]] interface and runs 
PageRank
+ * for a fixed number of iterations:
  * {{{
  * var PR = Array.fill(n)( 1.0 )
  * val oldPR = Array.fill(n)( 1.0 )


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-6337][Documentation, SQL]Spark 1.3 doc fixes

2015-03-22 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-1.3 3ba295fa0 - 857e8a60e


[SPARK-6337][Documentation, SQL]Spark 1.3 doc fixes

Author: vinodkc vinod.kc...@gmail.com

Closes #5112 from vinodkc/spark_1.3_doc_fixes and squashes the following 
commits:

2c6aee6 [vinodkc] Spark 1.3 doc fixes

(cherry picked from commit 2bf40c58e6e89e061783c999204107069df17f73)
Signed-off-by: Sean Owen so...@cloudera.com


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/857e8a60
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/857e8a60
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/857e8a60

Branch: refs/heads/branch-1.3
Commit: 857e8a60ed43881ecb27d2e5c33915a8d128b6e8
Parents: 3ba295f
Author: vinodkc vinod.kc...@gmail.com
Authored: Sun Mar 22 20:00:08 2015 +
Committer: Sean Owen so...@cloudera.com
Committed: Sun Mar 22 20:00:19 2015 +

--
 docs/sql-programming-guide.md| 7 +--
 mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala  | 2 +-
 sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala | 2 +-
 3 files changed, 7 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/857e8a60/docs/sql-programming-guide.md
--
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index a7d3574..6a333fd 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -509,8 +509,11 @@ val people = 
sc.textFile(examples/src/main/resources/people.txt)
 // The schema is encoded in a string
 val schemaString = name age
 
-// Import Spark SQL data types and Row.
-import org.apache.spark.sql._
+// Import Row.
+import org.apache.spark.sql.Row;
+
+// Import Spark SQL data types
+import org.apache.spark.sql.types.{StructType,StructField,StringType};
 
 // Generate the schema based on the string of schema
 val schema =

http://git-wip-us.apache.org/repos/asf/spark/blob/857e8a60/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
--
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala 
b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
index 5bbcd2e..c4a3610 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.types.StructType
 abstract class PipelineStage extends Serializable with Logging {
 
   /**
-   * :: DeveloperAPI ::
+   * :: DeveloperApi ::
*
* Derives the output schema from the input schema and parameters.
* The schema describes the columns and types of the data.

http://git-wip-us.apache.org/repos/asf/spark/blob/857e8a60/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
--
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 8b8f86c..5aece16 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -89,7 +89,7 @@ private[sql] object DataFrame {
  *   val people = sqlContext.parquetFile(...)
  *   val department = sqlContext.parquetFile(...)
  *
- *   people.filter(age  30)
+ *   people.filter(age  30)
  * .join(department, people(deptId) === department(id))
  * .groupBy(department(name), gender)
  * .agg(avg(people(salary)), max(people(age)))


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-6337][Documentation, SQL]Spark 1.3 doc fixes

2015-03-22 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 7a0da4770 - 2bf40c58e


[SPARK-6337][Documentation, SQL]Spark 1.3 doc fixes

Author: vinodkc vinod.kc...@gmail.com

Closes #5112 from vinodkc/spark_1.3_doc_fixes and squashes the following 
commits:

2c6aee6 [vinodkc] Spark 1.3 doc fixes


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2bf40c58
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2bf40c58
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2bf40c58

Branch: refs/heads/master
Commit: 2bf40c58e6e89e061783c999204107069df17f73
Parents: 7a0da47
Author: vinodkc vinod.kc...@gmail.com
Authored: Sun Mar 22 20:00:08 2015 +
Committer: Sean Owen so...@cloudera.com
Committed: Sun Mar 22 20:00:08 2015 +

--
 docs/sql-programming-guide.md| 7 +--
 mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala  | 2 +-
 sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala | 2 +-
 3 files changed, 7 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2bf40c58/docs/sql-programming-guide.md
--
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index a7d3574..6a333fd 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -509,8 +509,11 @@ val people = 
sc.textFile(examples/src/main/resources/people.txt)
 // The schema is encoded in a string
 val schemaString = name age
 
-// Import Spark SQL data types and Row.
-import org.apache.spark.sql._
+// Import Row.
+import org.apache.spark.sql.Row;
+
+// Import Spark SQL data types
+import org.apache.spark.sql.types.{StructType,StructField,StringType};
 
 // Generate the schema based on the string of schema
 val schema =

http://git-wip-us.apache.org/repos/asf/spark/blob/2bf40c58/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
--
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala 
b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
index 5bbcd2e..c4a3610 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.types.StructType
 abstract class PipelineStage extends Serializable with Logging {
 
   /**
-   * :: DeveloperAPI ::
+   * :: DeveloperApi ::
*
* Derives the output schema from the input schema and parameters.
* The schema describes the columns and types of the data.

http://git-wip-us.apache.org/repos/asf/spark/blob/2bf40c58/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
--
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 8b8f86c..5aece16 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -89,7 +89,7 @@ private[sql] object DataFrame {
  *   val people = sqlContext.parquetFile(...)
  *   val department = sqlContext.parquetFile(...)
  *
- *   people.filter(age  30)
+ *   people.filter(age  30)
  * .join(department, people(deptId) === department(id))
  * .groupBy(department(name), gender)
  * .agg(avg(people(salary)), max(people(age)))


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-6122][Core] Upgrade Tachyon client version to 0.6.1.

2015-03-22 Thread adav

Repository: spark
Updated Branches:
  refs/heads/master 6ef48632f - a41b9c600


[SPARK-6122][Core] Upgrade Tachyon client version to 0.6.1.

Changes the Tachyon client version from 0.5 to 0.6 in spark core and 
distribution script.

New dependencies in Tachyon 0.6.0 include

commons-codec:commons-codec:jar:1.5:compile
io.netty:netty-all:jar:4.0.23.Final:compile

These are already in spark core.

Author: Calvin Jia jia.cal...@gmail.com

Closes #4867 from calvinjia/upgrade_tachyon_0.6.0 and squashes the following 
commits:

eed9230 [Calvin Jia] Update tachyon version to 0.6.1.
11907b3 [Calvin Jia] Use TachyonURI for tachyon paths instead of strings.
71bf441 [Calvin Jia] Upgrade Tachyon client version to 0.6.0.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a41b9c60
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a41b9c60
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a41b9c60

Branch: refs/heads/master
Commit: a41b9c6004cfee84bd56dfa1faf5a0cf084551ae
Parents: 6ef4863
Author: Calvin Jia jia.cal...@gmail.com
Authored: Sun Mar 22 11:11:29 2015 -0700
Committer: Aaron Davidson aa...@databricks.com
Committed: Sun Mar 22 11:11:29 2015 -0700

--
 core/pom.xml|  2 +-
 .../spark/storage/TachyonBlockManager.scala | 27 ++--
 .../scala/org/apache/spark/util/Utils.scala |  4 ++-
 make-distribution.sh|  2 +-
 4 files changed, 18 insertions(+), 17 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a41b9c60/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index 6cd1965..868834d 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -275,7 +275,7 @@
 dependency
   groupIdorg.tachyonproject/groupId
   artifactIdtachyon-client/artifactId
-  version0.5.0/version
+  version0.6.1/version
   exclusions
 exclusion
   groupIdorg.apache.hadoop/groupId

http://git-wip-us.apache.org/repos/asf/spark/blob/a41b9c60/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala 
b/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala
index af87303..2ab6a8f 100644
--- a/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala
@@ -20,8 +20,8 @@ package org.apache.spark.storage
 import java.text.SimpleDateFormat
 import java.util.{Date, Random}
 
-import tachyon.client.TachyonFS
-import tachyon.client.TachyonFile
+import tachyon.TachyonURI
+import tachyon.client.{TachyonFile, TachyonFS}
 
 import org.apache.spark.Logging
 import org.apache.spark.executor.ExecutorExitCode
@@ -40,7 +40,7 @@ private[spark] class TachyonBlockManager(
 val master: String)
   extends Logging {
 
-  val client = if (master != null  master != ) TachyonFS.get(master) else 
null
+  val client = if (master != null  master != ) TachyonFS.get(new 
TachyonURI(master)) else null
 
   if (client == null) {
 logError(Failed to connect to the Tachyon as the master address is not 
configured)
@@ -60,11 +60,11 @@ private[spark] class TachyonBlockManager(
   addShutdownHook()
 
   def removeFile(file: TachyonFile): Boolean = {
-client.delete(file.getPath(), false)
+client.delete(new TachyonURI(file.getPath()), false)
   }
 
   def fileExists(file: TachyonFile): Boolean = {
-client.exist(file.getPath())
+client.exist(new TachyonURI(file.getPath()))
   }
 
   def getFile(filename: String): TachyonFile = {
@@ -81,7 +81,7 @@ private[spark] class TachyonBlockManager(
 if (old != null) {
   old
 } else {
-  val path = tachyonDirs(dirId) + / + %02x.format(subDirId)
+  val path = new 
TachyonURI(s${tachyonDirs(dirId)}/${%02x.format(subDirId)})
   client.mkdir(path)
   val newDir = client.getFile(path)
   subDirs(dirId)(subDirId) = newDir
@@ -89,7 +89,7 @@ private[spark] class TachyonBlockManager(
 }
   }
 }
-val filePath = subDir + / + filename
+val filePath = new TachyonURI(s$subDir/$filename)
 if(!client.exist(filePath)) {
   client.createFile(filePath)
 }
@@ -101,7 +101,7 @@ private[spark] class TachyonBlockManager(
 
   // TODO: Some of the logic here could be consolidated/de-duplicated with 
that in the DiskStore.
   private def createTachyonDirs(): Array[TachyonFile] = {
-logDebug(Creating tachyon directories at root dirs ' + rootDirs + ')
+logDebug(sCreating tachyon directories at root dirs '$rootDirs')
 val dateFormat = new

spark git commit: [SPARK-4985] [SQL] parquet support for date type

2015-03-22 Thread lian

Repository: spark
Updated Branches:
  refs/heads/branch-1.3 857e8a60e - 60b9b96b2


[SPARK-4985] [SQL] parquet support for date type

This PR might have some issues with #3732 ,
and this would have merge conflicts with #3820 so the review can be delayed 
till that 2 were merged.

Author: Daoyuan Wang daoyuan.w...@intel.com

Closes #3822 from adrian-wang/parquetdate and squashes the following commits:

2c5d54d [Daoyuan Wang] add a test case
faef887 [Daoyuan Wang] parquet support for primitive date
97e9080 [Daoyuan Wang] parquet support for date type

(cherry picked from commit 4659468f369d69e7f777130e5e3b4c5d47a624f1)
Signed-off-by: Cheng Lian l...@databricks.com


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/60b9b96b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/60b9b96b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/60b9b96b

Branch: refs/heads/branch-1.3
Commit: 60b9b96b227ff1821530e29210cc0c338ce97528
Parents: 857e8a6
Author: Daoyuan Wang daoyuan.w...@intel.com
Authored: Mon Mar 23 11:46:16 2015 +0800
Committer: Cheng Lian l...@databricks.com
Committed: Mon Mar 23 11:46:36 2015 +0800

--
 .../apache/spark/sql/parquet/ParquetConverter.scala  | 12 
 .../spark/sql/parquet/ParquetTableSupport.scala  |  2 ++
 .../org/apache/spark/sql/parquet/ParquetTypes.scala  |  4 
 .../apache/spark/sql/parquet/ParquetIOSuite.scala| 15 +++
 .../spark/sql/parquet/ParquetSchemaSuite.scala   |  3 ++-
 5 files changed, 35 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/60b9b96b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala
index f898e4b..43ca359 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala
@@ -127,6 +127,12 @@ private[sql] object CatalystConverter {
 parent.updateByte(fieldIndex, value.asInstanceOf[ByteType.JvmType])
 }
   }
+  case DateType = {
+new CatalystPrimitiveConverter(parent, fieldIndex) {
+  override def addInt(value: Int): Unit =
+parent.updateDate(fieldIndex, value.asInstanceOf[DateType.JvmType])
+}
+  }
   case d: DecimalType = {
 new CatalystPrimitiveConverter(parent, fieldIndex) {
   override def addBinary(value: Binary): Unit =
@@ -192,6 +198,9 @@ private[parquet] abstract class CatalystConverter extends 
GroupConverter {
   protected[parquet] def updateInt(fieldIndex: Int, value: Int): Unit =
 updateField(fieldIndex, value)
 
+  protected[parquet] def updateDate(fieldIndex: Int, value: Int): Unit =
+updateField(fieldIndex, value)
+
   protected[parquet] def updateLong(fieldIndex: Int, value: Long): Unit =
 updateField(fieldIndex, value)
 
@@ -388,6 +397,9 @@ private[parquet] class CatalystPrimitiveRowConverter(
   override protected[parquet] def updateInt(fieldIndex: Int, value: Int): Unit 
=
 current.setInt(fieldIndex, value)
 
+  override protected[parquet] def updateDate(fieldIndex: Int, value: Int): 
Unit =
+current.update(fieldIndex, value)
+
   override protected[parquet] def updateLong(fieldIndex: Int, value: Long): 
Unit =
 current.setLong(fieldIndex, value)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/60b9b96b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala
index 19bfba3..5a1b154 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala
@@ -212,6 +212,7 @@ private[parquet] class RowWriteSupport extends 
WriteSupport[Row] with Logging {
 case DoubleType = writer.addDouble(value.asInstanceOf[Double])
 case FloatType = writer.addFloat(value.asInstanceOf[Float])
 case BooleanType = writer.addBoolean(value.asInstanceOf[Boolean])
+case DateType = writer.addInteger(value.asInstanceOf[Int])
 case d: DecimalType =
   if (d.precisionInfo == None || d.precisionInfo.get.precision  18) {
 sys.error(sUnsupported datatype $d, cannot write to consumer)
@@ -358,6 +359,7 @@ private[parquet] class MutableRowWriteSupport extends 
RowWriteSupport {
   case

spark git commit: [SPARK-6397][SQL] Check the missingInput simply

2015-03-22 Thread lian

Repository: spark
Updated Branches:
  refs/heads/master 4659468f3 - e566fe598


[SPARK-6397][SQL] Check the missingInput simply

Author: q00251598 qiyad...@huawei.com

Closes #5082 from watermen/sql-missingInput and squashes the following commits:

25766b9 [q00251598] Check the missingInput simply


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e566fe59
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e566fe59
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e566fe59

Branch: refs/heads/master
Commit: e566fe5982bac5d24e6be76e5d7d6270544a85e6
Parents: 4659468
Author: q00251598 qiyad...@huawei.com
Authored: Mon Mar 23 12:06:13 2015 +0800
Committer: Cheng Lian l...@databricks.com
Committed: Mon Mar 23 12:06:13 2015 +0800

--
 .../org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala  | 5 ++---
 .../spark/sql/catalyst/plans/logical/basicOperators.scala   | 2 ++
 2 files changed, 4 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/e566fe59/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 4e8fc89..fb975ee 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -85,9 +85,8 @@ class CheckAnalysis {
 
 cleaned.foreach(checkValidAggregateExpression)
 
-  case o if o.children.nonEmpty 
-!o.references.filter(_.name != 
grouping__id).subsetOf(o.inputSet) =
-val missingAttributes = (o.references -- 
o.inputSet).map(_.prettyString).mkString(,)
+  case o if o.children.nonEmpty  o.missingInput.nonEmpty =
+val missingAttributes = 
o.missingInput.map(_.prettyString).mkString(,)
 val input = o.inputSet.map(_.prettyString).mkString(,)
 
 failAnalysis(sresolved attributes $missingAttributes missing from 
$input)

http://git-wip-us.apache.org/repos/asf/spark/blob/e566fe59/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
index 384fe53..a94b2d2 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
@@ -191,6 +191,8 @@ case class Expand(
 val sizeInBytes = child.statistics.sizeInBytes * projections.length
 Statistics(sizeInBytes = sizeInBytes)
   }
+
+  override def missingInput = super.missingInput.filter(_.name != 
VirtualColumn.groupingIdName)
 }
 
 trait GroupingAnalytics extends UnaryNode {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: Revert [SPARK-6397][SQL] Check the missingInput simply

2015-03-22 Thread lian

Repository: spark
Updated Branches:
  refs/heads/master e566fe598 - bf044def4


Revert [SPARK-6397][SQL] Check the missingInput simply

This reverts commit e566fe5982bac5d24e6be76e5d7d6270544a85e6.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bf044def
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bf044def
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bf044def

Branch: refs/heads/master
Commit: bf044def4c3a37a0fd4d5e70c2d57685cfd9fd71
Parents: e566fe5
Author: Cheng Lian l...@databricks.com
Authored: Mon Mar 23 12:15:19 2015 +0800
Committer: Cheng Lian l...@databricks.com
Committed: Mon Mar 23 12:15:19 2015 +0800

--
 .../org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala  | 5 +++--
 .../spark/sql/catalyst/plans/logical/basicOperators.scala   | 2 --
 2 files changed, 3 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/bf044def/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index fb975ee..4e8fc89 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -85,8 +85,9 @@ class CheckAnalysis {
 
 cleaned.foreach(checkValidAggregateExpression)
 
-  case o if o.children.nonEmpty  o.missingInput.nonEmpty =
-val missingAttributes = 
o.missingInput.map(_.prettyString).mkString(,)
+  case o if o.children.nonEmpty 
+!o.references.filter(_.name != 
grouping__id).subsetOf(o.inputSet) =
+val missingAttributes = (o.references -- 
o.inputSet).map(_.prettyString).mkString(,)
 val input = o.inputSet.map(_.prettyString).mkString(,)
 
 failAnalysis(sresolved attributes $missingAttributes missing from 
$input)

http://git-wip-us.apache.org/repos/asf/spark/blob/bf044def/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
index a94b2d2..384fe53 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
@@ -191,8 +191,6 @@ case class Expand(
 val sizeInBytes = child.statistics.sizeInBytes * projections.length
 Statistics(sizeInBytes = sizeInBytes)
   }
-
-  override def missingInput = super.missingInput.filter(_.name != 
VirtualColumn.groupingIdName)
 }
 
 trait GroupingAnalytics extends UnaryNode {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-6408] [SQL] Fix JDBCRDD filtering string literals

spark git commit: [SPARK-6408] [SQL] Fix JDBCRDD filtering string literals

[2/2] spark git commit: [SPARK-6132] ContextCleaner race condition across SparkContexts

[1/2] spark git commit: [SPARK-6132][HOTFIX] ContextCleaner InterruptedException should be quiet

spark git commit: [SPARK-6448] Make history server log parse exceptions

[1/2] spark git commit: [SPARK-6132] ContextCleaner race condition across SparkContexts

[2/2] spark git commit: [SPARK-6132][HOTFIX] ContextCleaner InterruptedException should be quiet

spark git commit: SPARK-6454 [DOCS] Fix links to pyspark api

spark git commit: SPARK-6454 [DOCS] Fix links to pyspark api

spark git commit: [SPARK-6455] [docs] Correct some mistakes and typos

spark git commit: [SPARK-6453][Mesos] Some Mesos*Suite have a different package with their classes

spark git commit: [HOTFIX] Build break due to https://github.com/apache/spark/pull/5128

spark git commit: [SPARK-6337][Documentation, SQL]Spark 1.3 doc fixes

spark git commit: [SPARK-6337][Documentation, SQL]Spark 1.3 doc fixes

spark git commit: [SPARK-6122][Core] Upgrade Tachyon client version to 0.6.1.

spark git commit: [SPARK-4985] [SQL] parquet support for date type

spark git commit: [SPARK-6397][SQL] Check the missingInput simply

spark git commit: Revert [SPARK-6397][SQL] Check the missingInput simply

18 matches

Site Navigation

Mail list logo

Footer information