git commit: correct tiny comment error
Repository: spark Updated Branches: refs/heads/master cf989601d - 9ecc40d3a correct tiny comment error Author: Chen Chao crazy...@gmail.com Closes #928 from CrazyJvm/patch-8 and squashes the following commits: 144328b [Chen Chao] correct tiny comment error Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9ecc40d3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9ecc40d3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9ecc40d3 Branch: refs/heads/master Commit: 9ecc40d3aeff0eb113f16df55f4249d8143f37f1 Parents: cf98960 Author: Chen Chao crazy...@gmail.com Authored: Sat May 31 00:06:49 2014 -0700 Committer: Reynold Xin r...@apache.org Committed: Sat May 31 00:06:49 2014 -0700 -- core/src/main/scala/org/apache/spark/SparkContext.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9ecc40d3/core/src/main/scala/org/apache/spark/SparkContext.scala -- diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 03ceff8..d941aea 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -76,8 +76,8 @@ class SparkContext(config: SparkConf) extends Logging { * :: DeveloperApi :: * Alternative constructor for setting preferred locations where Spark will create executors. * - * @param preferredNodeLocationData used in YARN mode to select nodes to launch containers on. Ca - * be generated using [[org.apache.spark.scheduler.InputFormatInfo.computePreferredLocations]] + * @param preferredNodeLocationData used in YARN mode to select nodes to launch containers on. + * Can be generated using [[org.apache.spark.scheduler.InputFormatInfo.computePreferredLocations]] * from a list of input files or InputFormats for the application. */ @DeveloperApi
git commit: Optionally include Hive as a dependency of the REPL.
Repository: spark Updated Branches: refs/heads/master 3ce81494c - 7463cd248 Optionally include Hive as a dependency of the REPL. Due to the way spark-shell launches from an assembly jar, I don't think this change will affect anyone who isn't trying to launch the shell directly from sbt. That said, it is kinda nice to be able to launch all things directly from SBT when developing. Author: Michael Armbrust mich...@databricks.com Closes #801 from marmbrus/hiveRepl and squashes the following commits: 9570571 [Michael Armbrust] Optionally include Hive as a dependency of the REPL. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7463cd24 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7463cd24 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7463cd24 Branch: refs/heads/master Commit: 7463cd248f81975bce9ff864002932864bd5b649 Parents: 3ce8149 Author: Michael Armbrust mich...@databricks.com Authored: Sat May 31 12:24:35 2014 -0700 Committer: Aaron Davidson aa...@databricks.com Committed: Sat May 31 12:24:35 2014 -0700 -- project/SparkBuild.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7463cd24/project/SparkBuild.scala -- diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 9833411..64c9441 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -59,8 +59,10 @@ object SparkBuild extends Build { lazy val core = Project(core, file(core), settings = coreSettings) + def replDependencies = Seq[ProjectReference](core, graphx, bagel, mllib, sql) ++ maybeHiveRef + lazy val repl = Project(repl, file(repl), settings = replSettings) -.dependsOn(core, graphx, bagel, mllib, sql) +.dependsOn(replDependencies.map(a = a: sbt.ClasspathDep[sbt.ProjectReference]): _*) lazy val tools = Project(tools, file(tools), settings = toolsSettings) dependsOn(core) dependsOn(streaming)
git commit: [SQL] SPARK-1964 Add timestamp to hive metastore type parser.
Repository: spark Updated Branches: refs/heads/master 7463cd248 - 1a0da0ec5 [SQL] SPARK-1964 Add timestamp to hive metastore type parser. Author: Michael Armbrust mich...@databricks.com Closes #913 from marmbrus/timestampMetastore and squashes the following commits: 8e0154f [Michael Armbrust] Add timestamp to hive metastore type parser. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1a0da0ec Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1a0da0ec Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1a0da0ec Branch: refs/heads/master Commit: 1a0da0ec5799f3226ce238cac03f53fa4f7c9326 Parents: 7463cd2 Author: Michael Armbrust mich...@databricks.com Authored: Sat May 31 12:34:22 2014 -0700 Committer: Reynold Xin r...@apache.org Committed: Sat May 31 12:34:22 2014 -0700 -- .../main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala | 1 + 1 file changed, 1 insertion(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1a0da0ec/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala -- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala index 9f74e03..a91b520 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala @@ -190,6 +190,7 @@ object HiveMetastoreTypes extends RegexParsers { binary ^^^ BinaryType | boolean ^^^ BooleanType | decimal ^^^ DecimalType | +timestamp ^^^ TimestampType | varchar\\((\\d+)\\).r ^^^ StringType protected lazy val arrayType: Parser[DataType] =
git commit: [SQL] SPARK-1964 Add timestamp to hive metastore type parser.
Repository: spark Updated Branches: refs/heads/branch-1.0 cefc6404a - 8575d8809 [SQL] SPARK-1964 Add timestamp to hive metastore type parser. Author: Michael Armbrust mich...@databricks.com Closes #913 from marmbrus/timestampMetastore and squashes the following commits: 8e0154f [Michael Armbrust] Add timestamp to hive metastore type parser. (cherry picked from commit 1a0da0ec5799f3226ce238cac03f53fa4f7c9326) Signed-off-by: Reynold Xin r...@apache.org Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8575d880 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8575d880 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8575d880 Branch: refs/heads/branch-1.0 Commit: 8575d8809f9062dc87ca72c57c0de42ec570118f Parents: cefc640 Author: Michael Armbrust mich...@databricks.com Authored: Sat May 31 12:34:22 2014 -0700 Committer: Reynold Xin r...@apache.org Committed: Sat May 31 12:34:31 2014 -0700 -- .../main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala | 1 + 1 file changed, 1 insertion(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8575d880/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala -- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala index 9f74e03..a91b520 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala @@ -190,6 +190,7 @@ object HiveMetastoreTypes extends RegexParsers { binary ^^^ BinaryType | boolean ^^^ BooleanType | decimal ^^^ DecimalType | +timestamp ^^^ TimestampType | varchar\\((\\d+)\\).r ^^^ StringType protected lazy val arrayType: Parser[DataType] =
git commit: Super minor: Close inputStream in SparkSubmitArguments
Repository: spark Updated Branches: refs/heads/master 1a0da0ec5 - 7d52777ef Super minor: Close inputStream in SparkSubmitArguments `Properties#load()` doesn't close the InputStream, but it'd be closed after being GC'd anyway... Also changed file.getName to file, because getName only shows the filename. This will show the full (possibly relative) path, which is less confusing if it's not found. Author: Aaron Davidson aa...@databricks.com Closes #914 from aarondav/tiny and squashes the following commits: db9d072 [Aaron Davidson] Super minor: Close inputStream in SparkSubmitArguments Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7d52777e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7d52777e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7d52777e Branch: refs/heads/master Commit: 7d52777effd0ff41aed545f53d2ab8de2364a188 Parents: 1a0da0e Author: Aaron Davidson aa...@databricks.com Authored: Sat May 31 12:36:58 2014 -0700 Committer: Reynold Xin r...@apache.org Committed: Sat May 31 12:36:58 2014 -0700 -- .../org/apache/spark/deploy/SparkSubmitArguments.scala | 11 +++ 1 file changed, 7 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7d52777e/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index bf449af..153eee3 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -381,16 +381,19 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { object SparkSubmitArguments { /** Load properties present in the given file. */ def getPropertiesFromFile(file: File): Seq[(String, String)] = { -require(file.exists(), sProperties file ${file.getName} does not exist) +require(file.exists(), sProperties file $file does not exist) +require(file.isFile(), sProperties file $file is not a normal file) val inputStream = new FileInputStream(file) -val properties = new Properties() try { + val properties = new Properties() properties.load(inputStream) + properties.stringPropertyNames().toSeq.map(k = (k, properties(k).trim)) } catch { case e: IOException = -val message = sFailed when loading Spark properties file ${file.getName} +val message = sFailed when loading Spark properties file $file throw new SparkException(message, e) +} finally { + inputStream.close() } -properties.stringPropertyNames().toSeq.map(k = (k, properties(k).trim)) } }
git commit: Super minor: Close inputStream in SparkSubmitArguments
Repository: spark Updated Branches: refs/heads/branch-1.0 8575d8809 - 5ef94ebd1 Super minor: Close inputStream in SparkSubmitArguments `Properties#load()` doesn't close the InputStream, but it'd be closed after being GC'd anyway... Also changed file.getName to file, because getName only shows the filename. This will show the full (possibly relative) path, which is less confusing if it's not found. Author: Aaron Davidson aa...@databricks.com Closes #914 from aarondav/tiny and squashes the following commits: db9d072 [Aaron Davidson] Super minor: Close inputStream in SparkSubmitArguments (cherry picked from commit 7d52777effd0ff41aed545f53d2ab8de2364a188) Signed-off-by: Reynold Xin r...@apache.org Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5ef94ebd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5ef94ebd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5ef94ebd Branch: refs/heads/branch-1.0 Commit: 5ef94ebd1407acdd0d80450a1101ae9a6fe1a636 Parents: 8575d88 Author: Aaron Davidson aa...@databricks.com Authored: Sat May 31 12:36:58 2014 -0700 Committer: Reynold Xin r...@apache.org Committed: Sat May 31 12:37:57 2014 -0700 -- .../org/apache/spark/deploy/SparkSubmitArguments.scala | 11 +++ 1 file changed, 7 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5ef94ebd/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index bf449af..153eee3 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -381,16 +381,19 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { object SparkSubmitArguments { /** Load properties present in the given file. */ def getPropertiesFromFile(file: File): Seq[(String, String)] = { -require(file.exists(), sProperties file ${file.getName} does not exist) +require(file.exists(), sProperties file $file does not exist) +require(file.isFile(), sProperties file $file is not a normal file) val inputStream = new FileInputStream(file) -val properties = new Properties() try { + val properties = new Properties() properties.load(inputStream) + properties.stringPropertyNames().toSeq.map(k = (k, properties(k).trim)) } catch { case e: IOException = -val message = sFailed when loading Spark properties file ${file.getName} +val message = sFailed when loading Spark properties file $file throw new SparkException(message, e) +} finally { + inputStream.close() } -properties.stringPropertyNames().toSeq.map(k = (k, properties(k).trim)) } }
git commit: SPARK-1917: fix PySpark import of scipy.special functions
Repository: spark Updated Branches: refs/heads/master d8c005d53 - 5e98967b6 SPARK-1917: fix PySpark import of scipy.special functions https://issues.apache.org/jira/browse/SPARK-1917 Author: Uri Laserson laser...@cloudera.com Closes #866 from laserson/SPARK-1917 and squashes the following commits: d947e8c [Uri Laserson] Added test for scipy.special importing 1798bbd [Uri Laserson] SPARK-1917: fix PySpark import of scipy.special Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5e98967b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5e98967b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5e98967b Branch: refs/heads/master Commit: 5e98967b612ccf026cb1cc5ff3ac8bf72d7e836e Parents: d8c005d Author: Uri Laserson laser...@cloudera.com Authored: Sat May 31 14:59:09 2014 -0700 Committer: Matei Zaharia ma...@databricks.com Committed: Sat May 31 14:59:09 2014 -0700 -- python/pyspark/cloudpickle.py | 2 +- python/pyspark/tests.py | 24 2 files changed, 25 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5e98967b/python/pyspark/cloudpickle.py -- diff --git a/python/pyspark/cloudpickle.py b/python/pyspark/cloudpickle.py index 6a7c23a..eb5dbb8 100644 --- a/python/pyspark/cloudpickle.py +++ b/python/pyspark/cloudpickle.py @@ -933,7 +933,7 @@ def _change_cell_value(cell, newval): Note: These can never be renamed due to client compatibility issues def _getobject(modname, attribute): -mod = __import__(modname) +mod = __import__(modname, fromlist=[attribute]) return mod.__dict__[attribute] def _generateImage(size, mode, str_rep): http://git-wip-us.apache.org/repos/asf/spark/blob/5e98967b/python/pyspark/tests.py -- diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py index 64f2eeb..ed90915 100644 --- a/python/pyspark/tests.py +++ b/python/pyspark/tests.py @@ -35,6 +35,14 @@ from pyspark.context import SparkContext from pyspark.files import SparkFiles from pyspark.serializers import read_int +_have_scipy = False +try: +import scipy.sparse +_have_scipy = True +except: +# No SciPy, but that's okay, we'll skip those tests +pass + SPARK_HOME = os.environ[SPARK_HOME] @@ -359,5 +367,21 @@ class TestSparkSubmit(unittest.TestCase): self.assertIn([2, 4, 6], out) +@unittest.skipIf(not _have_scipy, SciPy not installed) +class SciPyTests(PySparkTestCase): +General PySpark tests that depend on scipy + +def test_serialize(self): +from scipy.special import gammaln +x = range(1, 5) +expected = map(gammaln, x) +observed = self.sc.parallelize(x).map(gammaln).collect() +self.assertEqual(expected, observed) + + if __name__ == __main__: +if not _have_scipy: +print NOTE: Skipping SciPy tests as it does not seem to be installed unittest.main() +if not _have_scipy: +print NOTE: SciPy tests were skipped as it does not seem to be installed