git commit: correct tiny comment error

2014-05-31 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/master cf989601d - 9ecc40d3a


correct tiny comment error

Author: Chen Chao crazy...@gmail.com

Closes #928 from CrazyJvm/patch-8 and squashes the following commits:

144328b [Chen Chao] correct tiny comment error


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9ecc40d3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9ecc40d3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9ecc40d3

Branch: refs/heads/master
Commit: 9ecc40d3aeff0eb113f16df55f4249d8143f37f1
Parents: cf98960
Author: Chen Chao crazy...@gmail.com
Authored: Sat May 31 00:06:49 2014 -0700
Committer: Reynold Xin r...@apache.org
Committed: Sat May 31 00:06:49 2014 -0700

--
 core/src/main/scala/org/apache/spark/SparkContext.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9ecc40d3/core/src/main/scala/org/apache/spark/SparkContext.scala
--
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala 
b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 03ceff8..d941aea 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -76,8 +76,8 @@ class SparkContext(config: SparkConf) extends Logging {
* :: DeveloperApi ::
* Alternative constructor for setting preferred locations where Spark will 
create executors.
*
-   * @param preferredNodeLocationData used in YARN mode to select nodes to 
launch containers on. Ca
-   * be generated using 
[[org.apache.spark.scheduler.InputFormatInfo.computePreferredLocations]]
+   * @param preferredNodeLocationData used in YARN mode to select nodes to 
launch containers on.
+   * Can be generated using 
[[org.apache.spark.scheduler.InputFormatInfo.computePreferredLocations]]
* from a list of input files or InputFormats for the application.
*/
   @DeveloperApi



git commit: Optionally include Hive as a dependency of the REPL.

2014-05-31 Thread adav
Repository: spark
Updated Branches:
  refs/heads/master 3ce81494c - 7463cd248


Optionally include Hive as a dependency of the REPL.

Due to the way spark-shell launches from an assembly jar, I don't think this 
change will affect anyone who isn't trying to launch the shell directly from 
sbt.  That said, it is kinda nice to be able to launch all things directly from 
SBT when developing.

Author: Michael Armbrust mich...@databricks.com

Closes #801 from marmbrus/hiveRepl and squashes the following commits:

9570571 [Michael Armbrust] Optionally include Hive as a dependency of the REPL.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7463cd24
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7463cd24
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7463cd24

Branch: refs/heads/master
Commit: 7463cd248f81975bce9ff864002932864bd5b649
Parents: 3ce8149
Author: Michael Armbrust mich...@databricks.com
Authored: Sat May 31 12:24:35 2014 -0700
Committer: Aaron Davidson aa...@databricks.com
Committed: Sat May 31 12:24:35 2014 -0700

--
 project/SparkBuild.scala | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7463cd24/project/SparkBuild.scala
--
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 9833411..64c9441 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -59,8 +59,10 @@ object SparkBuild extends Build {
 
   lazy val core = Project(core, file(core), settings = coreSettings)
 
+  def replDependencies = Seq[ProjectReference](core, graphx, bagel, mllib, 
sql) ++ maybeHiveRef
+
   lazy val repl = Project(repl, file(repl), settings = replSettings)
-.dependsOn(core, graphx, bagel, mllib, sql)
+.dependsOn(replDependencies.map(a = a: 
sbt.ClasspathDep[sbt.ProjectReference]): _*)
 
   lazy val tools = Project(tools, file(tools), settings = toolsSettings) 
dependsOn(core) dependsOn(streaming)
 



git commit: [SQL] SPARK-1964 Add timestamp to hive metastore type parser.

2014-05-31 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/master 7463cd248 - 1a0da0ec5


[SQL] SPARK-1964 Add timestamp to hive metastore type parser.

Author: Michael Armbrust mich...@databricks.com

Closes #913 from marmbrus/timestampMetastore and squashes the following commits:

8e0154f [Michael Armbrust] Add timestamp to hive metastore type parser.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1a0da0ec
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1a0da0ec
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1a0da0ec

Branch: refs/heads/master
Commit: 1a0da0ec5799f3226ce238cac03f53fa4f7c9326
Parents: 7463cd2
Author: Michael Armbrust mich...@databricks.com
Authored: Sat May 31 12:34:22 2014 -0700
Committer: Reynold Xin r...@apache.org
Committed: Sat May 31 12:34:22 2014 -0700

--
 .../main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala | 1 +
 1 file changed, 1 insertion(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1a0da0ec/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
--
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 9f74e03..a91b520 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -190,6 +190,7 @@ object HiveMetastoreTypes extends RegexParsers {
 binary ^^^ BinaryType |
 boolean ^^^ BooleanType |
 decimal ^^^ DecimalType |
+timestamp ^^^ TimestampType |
 varchar\\((\\d+)\\).r ^^^ StringType
 
   protected lazy val arrayType: Parser[DataType] =



git commit: [SQL] SPARK-1964 Add timestamp to hive metastore type parser.

2014-05-31 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/branch-1.0 cefc6404a - 8575d8809


[SQL] SPARK-1964 Add timestamp to hive metastore type parser.

Author: Michael Armbrust mich...@databricks.com

Closes #913 from marmbrus/timestampMetastore and squashes the following commits:

8e0154f [Michael Armbrust] Add timestamp to hive metastore type parser.

(cherry picked from commit 1a0da0ec5799f3226ce238cac03f53fa4f7c9326)
Signed-off-by: Reynold Xin r...@apache.org


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8575d880
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8575d880
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8575d880

Branch: refs/heads/branch-1.0
Commit: 8575d8809f9062dc87ca72c57c0de42ec570118f
Parents: cefc640
Author: Michael Armbrust mich...@databricks.com
Authored: Sat May 31 12:34:22 2014 -0700
Committer: Reynold Xin r...@apache.org
Committed: Sat May 31 12:34:31 2014 -0700

--
 .../main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala | 1 +
 1 file changed, 1 insertion(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/8575d880/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
--
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 9f74e03..a91b520 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -190,6 +190,7 @@ object HiveMetastoreTypes extends RegexParsers {
 binary ^^^ BinaryType |
 boolean ^^^ BooleanType |
 decimal ^^^ DecimalType |
+timestamp ^^^ TimestampType |
 varchar\\((\\d+)\\).r ^^^ StringType
 
   protected lazy val arrayType: Parser[DataType] =



git commit: Super minor: Close inputStream in SparkSubmitArguments

2014-05-31 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/master 1a0da0ec5 - 7d52777ef


Super minor: Close inputStream in SparkSubmitArguments

`Properties#load()` doesn't close the InputStream, but it'd be closed after 
being GC'd anyway...

Also changed file.getName to file, because getName only shows the filename. 
This will show the full (possibly relative) path, which is less confusing if 
it's not found.

Author: Aaron Davidson aa...@databricks.com

Closes #914 from aarondav/tiny and squashes the following commits:

db9d072 [Aaron Davidson] Super minor: Close inputStream in SparkSubmitArguments


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7d52777e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7d52777e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7d52777e

Branch: refs/heads/master
Commit: 7d52777effd0ff41aed545f53d2ab8de2364a188
Parents: 1a0da0e
Author: Aaron Davidson aa...@databricks.com
Authored: Sat May 31 12:36:58 2014 -0700
Committer: Reynold Xin r...@apache.org
Committed: Sat May 31 12:36:58 2014 -0700

--
 .../org/apache/spark/deploy/SparkSubmitArguments.scala   | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7d52777e/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index bf449af..153eee3 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -381,16 +381,19 @@ private[spark] class SparkSubmitArguments(args: 
Seq[String]) {
 object SparkSubmitArguments {
   /** Load properties present in the given file. */
   def getPropertiesFromFile(file: File): Seq[(String, String)] = {
-require(file.exists(), sProperties file ${file.getName} does not exist)
+require(file.exists(), sProperties file $file does not exist)
+require(file.isFile(), sProperties file $file is not a normal file)
 val inputStream = new FileInputStream(file)
-val properties = new Properties()
 try {
+  val properties = new Properties()
   properties.load(inputStream)
+  properties.stringPropertyNames().toSeq.map(k = (k, properties(k).trim))
 } catch {
   case e: IOException =
-val message = sFailed when loading Spark properties file 
${file.getName}
+val message = sFailed when loading Spark properties file $file
 throw new SparkException(message, e)
+} finally {
+  inputStream.close()
 }
-properties.stringPropertyNames().toSeq.map(k = (k, properties(k).trim))
   }
 }



git commit: Super minor: Close inputStream in SparkSubmitArguments

2014-05-31 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/branch-1.0 8575d8809 - 5ef94ebd1


Super minor: Close inputStream in SparkSubmitArguments

`Properties#load()` doesn't close the InputStream, but it'd be closed after 
being GC'd anyway...

Also changed file.getName to file, because getName only shows the filename. 
This will show the full (possibly relative) path, which is less confusing if 
it's not found.

Author: Aaron Davidson aa...@databricks.com

Closes #914 from aarondav/tiny and squashes the following commits:

db9d072 [Aaron Davidson] Super minor: Close inputStream in SparkSubmitArguments

(cherry picked from commit 7d52777effd0ff41aed545f53d2ab8de2364a188)
Signed-off-by: Reynold Xin r...@apache.org


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5ef94ebd
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5ef94ebd
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5ef94ebd

Branch: refs/heads/branch-1.0
Commit: 5ef94ebd1407acdd0d80450a1101ae9a6fe1a636
Parents: 8575d88
Author: Aaron Davidson aa...@databricks.com
Authored: Sat May 31 12:36:58 2014 -0700
Committer: Reynold Xin r...@apache.org
Committed: Sat May 31 12:37:57 2014 -0700

--
 .../org/apache/spark/deploy/SparkSubmitArguments.scala   | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5ef94ebd/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index bf449af..153eee3 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -381,16 +381,19 @@ private[spark] class SparkSubmitArguments(args: 
Seq[String]) {
 object SparkSubmitArguments {
   /** Load properties present in the given file. */
   def getPropertiesFromFile(file: File): Seq[(String, String)] = {
-require(file.exists(), sProperties file ${file.getName} does not exist)
+require(file.exists(), sProperties file $file does not exist)
+require(file.isFile(), sProperties file $file is not a normal file)
 val inputStream = new FileInputStream(file)
-val properties = new Properties()
 try {
+  val properties = new Properties()
   properties.load(inputStream)
+  properties.stringPropertyNames().toSeq.map(k = (k, properties(k).trim))
 } catch {
   case e: IOException =
-val message = sFailed when loading Spark properties file 
${file.getName}
+val message = sFailed when loading Spark properties file $file
 throw new SparkException(message, e)
+} finally {
+  inputStream.close()
 }
-properties.stringPropertyNames().toSeq.map(k = (k, properties(k).trim))
   }
 }



git commit: SPARK-1917: fix PySpark import of scipy.special functions

2014-05-31 Thread matei
Repository: spark
Updated Branches:
  refs/heads/master d8c005d53 - 5e98967b6


SPARK-1917: fix PySpark import of scipy.special functions

https://issues.apache.org/jira/browse/SPARK-1917

Author: Uri Laserson laser...@cloudera.com

Closes #866 from laserson/SPARK-1917 and squashes the following commits:

d947e8c [Uri Laserson] Added test for scipy.special importing
1798bbd [Uri Laserson] SPARK-1917: fix PySpark import of scipy.special


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5e98967b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5e98967b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5e98967b

Branch: refs/heads/master
Commit: 5e98967b612ccf026cb1cc5ff3ac8bf72d7e836e
Parents: d8c005d
Author: Uri Laserson laser...@cloudera.com
Authored: Sat May 31 14:59:09 2014 -0700
Committer: Matei Zaharia ma...@databricks.com
Committed: Sat May 31 14:59:09 2014 -0700

--
 python/pyspark/cloudpickle.py |  2 +-
 python/pyspark/tests.py   | 24 
 2 files changed, 25 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5e98967b/python/pyspark/cloudpickle.py
--
diff --git a/python/pyspark/cloudpickle.py b/python/pyspark/cloudpickle.py
index 6a7c23a..eb5dbb8 100644
--- a/python/pyspark/cloudpickle.py
+++ b/python/pyspark/cloudpickle.py
@@ -933,7 +933,7 @@ def _change_cell_value(cell, newval):
 Note: These can never be renamed due to client compatibility issues
 
 def _getobject(modname, attribute):
-mod = __import__(modname)
+mod = __import__(modname, fromlist=[attribute])
 return mod.__dict__[attribute]
 
 def _generateImage(size, mode, str_rep):

http://git-wip-us.apache.org/repos/asf/spark/blob/5e98967b/python/pyspark/tests.py
--
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index 64f2eeb..ed90915 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -35,6 +35,14 @@ from pyspark.context import SparkContext
 from pyspark.files import SparkFiles
 from pyspark.serializers import read_int
 
+_have_scipy = False
+try:
+import scipy.sparse
+_have_scipy = True
+except:
+# No SciPy, but that's okay, we'll skip those tests
+pass
+
 
 SPARK_HOME = os.environ[SPARK_HOME]
 
@@ -359,5 +367,21 @@ class TestSparkSubmit(unittest.TestCase):
 self.assertIn([2, 4, 6], out)
 
 
+@unittest.skipIf(not _have_scipy, SciPy not installed)
+class SciPyTests(PySparkTestCase):
+General PySpark tests that depend on scipy 
+
+def test_serialize(self):
+from scipy.special import gammaln
+x = range(1, 5)
+expected = map(gammaln, x)
+observed = self.sc.parallelize(x).map(gammaln).collect()
+self.assertEqual(expected, observed)
+
+
 if __name__ == __main__:
+if not _have_scipy:
+print NOTE: Skipping SciPy tests as it does not seem to be installed
 unittest.main()
+if not _have_scipy:
+print NOTE: SciPy tests were skipped as it does not seem to be 
installed