spark git commit: [SPARK-20661][SPARKR][TEST][FOLLOWUP] SparkR tableNames() test fails
Repository: spark Updated Branches: refs/heads/branch-2.2 4179ffc03 -> 54e074349 [SPARK-20661][SPARKR][TEST][FOLLOWUP] SparkR tableNames() test fails ## What changes were proposed in this pull request? Change it to check for relative count like in this test https://github.com/apache/spark/blame/master/R/pkg/inst/tests/testthat/test_sparkSQL.R#L3355 for catalog APIs ## How was this patch tested? unit tests, this needs to combine with another commit with SQL change to check Author: Felix CheungCloses #17905 from felixcheung/rtabletests. (cherry picked from commit b952b44af4d243f1e3ad88bccf4af7d04df3fc81) Signed-off-by: Felix Cheung Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/54e07434 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/54e07434 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/54e07434 Branch: refs/heads/branch-2.2 Commit: 54e07434968624dbb0fb80773356e614b954e52f Parents: 4179ffc Author: Felix Cheung Authored: Mon May 8 22:49:40 2017 -0700 Committer: Felix Cheung Committed: Mon May 8 22:49:53 2017 -0700 -- R/pkg/inst/tests/testthat/test_sparkSQL.R | 15 --- 1 file changed, 8 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/54e07434/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 58cd259..ae2969f 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -668,26 +668,27 @@ test_that("jsonRDD() on a RDD with json string", { }) test_that("test tableNames and tables", { - # Making sure there are no registered temp tables from previous tests - suppressWarnings(sapply(tableNames(), function(tname) { dropTempTable(tname) })) + count <- count(listTables()) + df <- read.json(jsonPath) createOrReplaceTempView(df, "table1") - expect_equal(length(tableNames()), 1) - expect_equal(length(tableNames("default")), 1) + expect_equal(length(tableNames()), count + 1) + expect_equal(length(tableNames("default")), count + 1) + tables <- listTables() - expect_equal(count(tables), 1) + expect_equal(count(tables), count + 1) expect_equal(count(tables()), count(tables)) expect_true("tableName" %in% colnames(tables())) expect_true(all(c("tableName", "database", "isTemporary") %in% colnames(tables( suppressWarnings(registerTempTable(df, "table2")) tables <- listTables() - expect_equal(count(tables), 2) + expect_equal(count(tables), count + 2) suppressWarnings(dropTempTable("table1")) expect_true(dropTempView("table2")) tables <- listTables() - expect_equal(count(tables), 0) + expect_equal(count(tables), count + 0) }) test_that( - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20661][SPARKR][TEST][FOLLOWUP] SparkR tableNames() test fails
Repository: spark Updated Branches: refs/heads/master 2abfee18b -> b952b44af [SPARK-20661][SPARKR][TEST][FOLLOWUP] SparkR tableNames() test fails ## What changes were proposed in this pull request? Change it to check for relative count like in this test https://github.com/apache/spark/blame/master/R/pkg/inst/tests/testthat/test_sparkSQL.R#L3355 for catalog APIs ## How was this patch tested? unit tests, this needs to combine with another commit with SQL change to check Author: Felix CheungCloses #17905 from felixcheung/rtabletests. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b952b44a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b952b44a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b952b44a Branch: refs/heads/master Commit: b952b44af4d243f1e3ad88bccf4af7d04df3fc81 Parents: 2abfee1 Author: Felix Cheung Authored: Mon May 8 22:49:40 2017 -0700 Committer: Felix Cheung Committed: Mon May 8 22:49:40 2017 -0700 -- R/pkg/inst/tests/testthat/test_sparkSQL.R | 15 --- 1 file changed, 8 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b952b44a/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index ab6888e..19aa61e 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -677,26 +677,27 @@ test_that("jsonRDD() on a RDD with json string", { }) test_that("test tableNames and tables", { - # Making sure there are no registered temp tables from previous tests - suppressWarnings(sapply(tableNames(), function(tname) { dropTempTable(tname) })) + count <- count(listTables()) + df <- read.json(jsonPath) createOrReplaceTempView(df, "table1") - expect_equal(length(tableNames()), 1) - expect_equal(length(tableNames("default")), 1) + expect_equal(length(tableNames()), count + 1) + expect_equal(length(tableNames("default")), count + 1) + tables <- listTables() - expect_equal(count(tables), 1) + expect_equal(count(tables), count + 1) expect_equal(count(tables()), count(tables)) expect_true("tableName" %in% colnames(tables())) expect_true(all(c("tableName", "database", "isTemporary") %in% colnames(tables( suppressWarnings(registerTempTable(df, "table2")) tables <- listTables() - expect_equal(count(tables), 2) + expect_equal(count(tables), count + 2) suppressWarnings(dropTempTable("table1")) expect_true(dropTempView("table2")) tables <- listTables() - expect_equal(count(tables), 0) + expect_equal(count(tables), count + 0) }) test_that( - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20661][SPARKR][TEST] SparkR tableNames() test fails
Repository: spark Updated Branches: refs/heads/branch-2.2 23681e9ca -> 4179ffc03 [SPARK-20661][SPARKR][TEST] SparkR tableNames() test fails ## What changes were proposed in this pull request? Cleaning existing temp tables before running tableNames tests ## How was this patch tested? SparkR Unit tests Author: HosseinCloses #17903 from falaki/SPARK-20661. (cherry picked from commit 2abfee18b6511482b916c36f00bf3abf68a59e19) Signed-off-by: Yin Huai Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4179ffc0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4179ffc0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4179ffc0 Branch: refs/heads/branch-2.2 Commit: 4179ffc031a0dbca6a93255c673de800ce7393fe Parents: 23681e9 Author: Hossein Authored: Mon May 8 14:48:11 2017 -0700 Committer: Yin Huai Committed: Mon May 8 14:48:29 2017 -0700 -- R/pkg/inst/tests/testthat/test_sparkSQL.R | 2 ++ 1 file changed, 2 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4179ffc0/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 3f445e2..58cd259 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -668,6 +668,8 @@ test_that("jsonRDD() on a RDD with json string", { }) test_that("test tableNames and tables", { + # Making sure there are no registered temp tables from previous tests + suppressWarnings(sapply(tableNames(), function(tname) { dropTempTable(tname) })) df <- read.json(jsonPath) createOrReplaceTempView(df, "table1") expect_equal(length(tableNames()), 1) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20661][SPARKR][TEST] SparkR tableNames() test fails
Repository: spark Updated Branches: refs/heads/master 829cd7b8b -> 2abfee18b [SPARK-20661][SPARKR][TEST] SparkR tableNames() test fails ## What changes were proposed in this pull request? Cleaning existing temp tables before running tableNames tests ## How was this patch tested? SparkR Unit tests Author: HosseinCloses #17903 from falaki/SPARK-20661. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2abfee18 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2abfee18 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2abfee18 Branch: refs/heads/master Commit: 2abfee18b6511482b916c36f00bf3abf68a59e19 Parents: 829cd7b Author: Hossein Authored: Mon May 8 14:48:11 2017 -0700 Committer: Yin Huai Committed: Mon May 8 14:48:11 2017 -0700 -- R/pkg/inst/tests/testthat/test_sparkSQL.R | 2 ++ 1 file changed, 2 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2abfee18/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index f517ce6..ab6888e 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -677,6 +677,8 @@ test_that("jsonRDD() on a RDD with json string", { }) test_that("test tableNames and tables", { + # Making sure there are no registered temp tables from previous tests + suppressWarnings(sapply(tableNames(), function(tname) { dropTempTable(tname) })) df <- read.json(jsonPath) createOrReplaceTempView(df, "table1") expect_equal(length(tableNames()), 1) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20605][CORE][YARN][MESOS] Deprecate not used AM and executor port configuration
Repository: spark Updated Branches: refs/heads/master aeb2ecc0c -> 829cd7b8b [SPARK-20605][CORE][YARN][MESOS] Deprecate not used AM and executor port configuration ## What changes were proposed in this pull request? After SPARK-10997, client mode Netty RpcEnv doesn't require to start server, so port configurations are not used any more, here propose to remove these two configurations: "spark.executor.port" and "spark.am.port". ## How was this patch tested? Existing UTs. Author: jerryshaoCloses #17866 from jerryshao/SPARK-20605. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/829cd7b8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/829cd7b8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/829cd7b8 Branch: refs/heads/master Commit: 829cd7b8b70e65a91aa66e6d626bd45f18e0ad97 Parents: aeb2ecc Author: jerryshao Authored: Mon May 8 14:27:56 2017 -0700 Committer: Marcelo Vanzin Committed: Mon May 8 14:27:56 2017 -0700 -- .../main/scala/org/apache/spark/SparkConf.scala | 4 ++- .../main/scala/org/apache/spark/SparkEnv.scala | 14 +++- .../executor/CoarseGrainedExecutorBackend.scala | 5 ++- docs/running-on-mesos.md| 2 +- docs/running-on-yarn.md | 7 .../spark/executor/MesosExecutorBackend.scala | 3 +- .../cluster/mesos/MesosSchedulerUtils.scala | 2 +- .../mesos/MesosSchedulerUtilsSuite.scala| 34 ++-- .../spark/deploy/yarn/ApplicationMaster.scala | 3 +- .../org/apache/spark/deploy/yarn/config.scala | 5 --- 10 files changed, 22 insertions(+), 57 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/829cd7b8/core/src/main/scala/org/apache/spark/SparkConf.scala -- diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala index 2a2ce05..956724b 100644 --- a/core/src/main/scala/org/apache/spark/SparkConf.scala +++ b/core/src/main/scala/org/apache/spark/SparkConf.scala @@ -579,7 +579,9 @@ private[spark] object SparkConf extends Logging { "are no longer accepted. To specify the equivalent now, one may use '64k'."), DeprecatedConfig("spark.rpc", "2.0", "Not used any more."), DeprecatedConfig("spark.scheduler.executorTaskBlacklistTime", "2.1.0", -"Please use the new blacklisting options, spark.blacklist.*") +"Please use the new blacklisting options, spark.blacklist.*"), + DeprecatedConfig("spark.yarn.am.port", "2.0.0", "Not used any more"), + DeprecatedConfig("spark.executor.port", "2.0.0", "Not used any more") ) Map(configs.map { cfg => (cfg.key -> cfg) } : _*) http://git-wip-us.apache.org/repos/asf/spark/blob/829cd7b8/core/src/main/scala/org/apache/spark/SparkEnv.scala -- diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala index f4a59f0..3196c1e 100644 --- a/core/src/main/scala/org/apache/spark/SparkEnv.scala +++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala @@ -177,7 +177,7 @@ object SparkEnv extends Logging { SparkContext.DRIVER_IDENTIFIER, bindAddress, advertiseAddress, - port, + Option(port), isLocal, numCores, ioEncryptionKey, @@ -194,7 +194,6 @@ object SparkEnv extends Logging { conf: SparkConf, executorId: String, hostname: String, - port: Int, numCores: Int, ioEncryptionKey: Option[Array[Byte]], isLocal: Boolean): SparkEnv = { @@ -203,7 +202,7 @@ object SparkEnv extends Logging { executorId, hostname, hostname, - port, + None, isLocal, numCores, ioEncryptionKey @@ -220,7 +219,7 @@ object SparkEnv extends Logging { executorId: String, bindAddress: String, advertiseAddress: String, - port: Int, + port: Option[Int], isLocal: Boolean, numUsableCores: Int, ioEncryptionKey: Option[Array[Byte]], @@ -243,17 +242,12 @@ object SparkEnv extends Logging { } val systemName = if (isDriver) driverSystemName else executorSystemName -val rpcEnv = RpcEnv.create(systemName, bindAddress, advertiseAddress, port, conf, +val rpcEnv = RpcEnv.create(systemName, bindAddress, advertiseAddress, port.getOrElse(-1), conf, securityManager, clientMode = !isDriver) // Figure out which port RpcEnv actually bound to in case the original port is 0 or occupied. -// In the non-driver case, the RPC env's address may be null since it may not be listening -
spark git commit: [SPARK-20621][DEPLOY] Delete deprecated config parameter in 'spark-env.sh'
Repository: spark Updated Branches: refs/heads/branch-2.2 7b9d05ad0 -> 23681e9ca [SPARK-20621][DEPLOY] Delete deprecated config parameter in 'spark-env.sh' ## What changes were proposed in this pull request? Currently, `spark.executor.instances` is deprecated in `spark-env.sh`, because we suggest config it in `spark-defaults.conf` or other config file. And also this parameter is useless even if you set it in `spark-env.sh`, so remove it in this patch. ## How was this patch tested? Existing tests. Please review http://spark.apache.org/contributing.html before opening a pull request. Author: Xianyang LiuCloses #17881 from ConeyLiu/deprecatedParam. (cherry picked from commit aeb2ecc0cd898f5352df0a04be1014b02ea3e20e) Signed-off-by: Marcelo Vanzin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/23681e9c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/23681e9c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/23681e9c Branch: refs/heads/branch-2.2 Commit: 23681e9ca0042328f93962701d19ca371727b0b7 Parents: 7b9d05a Author: Xianyang Liu Authored: Mon May 8 10:25:24 2017 -0700 Committer: Marcelo Vanzin Committed: Mon May 8 10:25:35 2017 -0700 -- conf/spark-env.sh.template | 1 - .../org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala | 5 + 2 files changed, 1 insertion(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/23681e9c/conf/spark-env.sh.template -- diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template index 94bd2c4..b7c985a 100755 --- a/conf/spark-env.sh.template +++ b/conf/spark-env.sh.template @@ -34,7 +34,6 @@ # Options read in YARN client mode # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files -# - SPARK_EXECUTOR_INSTANCES, Number of executors to start (Default: 2) # - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1). # - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G) # - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G) http://git-wip-us.apache.org/repos/asf/spark/blob/23681e9c/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala -- diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala index 9357885..0fc994d 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala @@ -280,10 +280,7 @@ object YarnSparkHadoopUtil { initialNumExecutors } else { - val targetNumExecutors = - sys.env.get("SPARK_EXECUTOR_INSTANCES").map(_.toInt).getOrElse(numExecutors) - // System property can override environment variable. - conf.get(EXECUTOR_INSTANCES).getOrElse(targetNumExecutors) + conf.get(EXECUTOR_INSTANCES).getOrElse(numExecutors) } } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20621][DEPLOY] Delete deprecated config parameter in 'spark-env.sh'
Repository: spark Updated Branches: refs/heads/master 58518d070 -> aeb2ecc0c [SPARK-20621][DEPLOY] Delete deprecated config parameter in 'spark-env.sh' ## What changes were proposed in this pull request? Currently, `spark.executor.instances` is deprecated in `spark-env.sh`, because we suggest config it in `spark-defaults.conf` or other config file. And also this parameter is useless even if you set it in `spark-env.sh`, so remove it in this patch. ## How was this patch tested? Existing tests. Please review http://spark.apache.org/contributing.html before opening a pull request. Author: Xianyang LiuCloses #17881 from ConeyLiu/deprecatedParam. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/aeb2ecc0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/aeb2ecc0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/aeb2ecc0 Branch: refs/heads/master Commit: aeb2ecc0cd898f5352df0a04be1014b02ea3e20e Parents: 58518d0 Author: Xianyang Liu Authored: Mon May 8 10:25:24 2017 -0700 Committer: Marcelo Vanzin Committed: Mon May 8 10:25:24 2017 -0700 -- conf/spark-env.sh.template | 1 - .../org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala | 5 + 2 files changed, 1 insertion(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/aeb2ecc0/conf/spark-env.sh.template -- diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template index 94bd2c4..b7c985a 100755 --- a/conf/spark-env.sh.template +++ b/conf/spark-env.sh.template @@ -34,7 +34,6 @@ # Options read in YARN client mode # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files -# - SPARK_EXECUTOR_INSTANCES, Number of executors to start (Default: 2) # - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1). # - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G) # - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G) http://git-wip-us.apache.org/repos/asf/spark/blob/aeb2ecc0/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala -- diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala index 9357885..0fc994d 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala @@ -280,10 +280,7 @@ object YarnSparkHadoopUtil { initialNumExecutors } else { - val targetNumExecutors = - sys.env.get("SPARK_EXECUTOR_INSTANCES").map(_.toInt).getOrElse(numExecutors) - // System property can override environment variable. - conf.get(EXECUTOR_INSTANCES).getOrElse(targetNumExecutors) + conf.get(EXECUTOR_INSTANCES).getOrElse(numExecutors) } } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20596][ML][TEST] Consolidate and improve ALS recommendAll test cases
Repository: spark Updated Branches: refs/heads/master 15526653a -> 58518d070 [SPARK-20596][ML][TEST] Consolidate and improve ALS recommendAll test cases Existing test cases for `recommendForAllX` methods (added in [SPARK-19535](https://issues.apache.org/jira/browse/SPARK-19535)) test `k < num items` and `k = num items`. Technically we should also test that `k > num items` returns the same results as `k = num items`. ## How was this patch tested? Updated existing unit tests. Author: Nick PentreathCloses #17860 from MLnick/SPARK-20596-als-rec-tests. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/58518d07 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/58518d07 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/58518d07 Branch: refs/heads/master Commit: 58518d070777fc0665c4d02bad8adf910807df98 Parents: 1552665 Author: Nick Pentreath Authored: Mon May 8 12:45:00 2017 +0200 Committer: Nick Pentreath Committed: Mon May 8 12:45:00 2017 +0200 -- .../spark/ml/recommendation/ALSSuite.scala | 63 1 file changed, 25 insertions(+), 38 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/58518d07/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala -- diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala index 7574af3..9d31e79 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala @@ -671,58 +671,45 @@ class ALSSuite .setItemCol("item") } - test("recommendForAllUsers with k < num_items") { -val topItems = getALSModel.recommendForAllUsers(2) -assert(topItems.count() == 3) -assert(topItems.columns.contains("user")) - -val expected = Map( - 0 -> Array((3, 54f), (4, 44f)), - 1 -> Array((3, 39f), (5, 33f)), - 2 -> Array((3, 51f), (5, 45f)) -) -checkRecommendations(topItems, expected, "item") - } - - test("recommendForAllUsers with k = num_items") { -val topItems = getALSModel.recommendForAllUsers(4) -assert(topItems.count() == 3) -assert(topItems.columns.contains("user")) - + test("recommendForAllUsers with k <, = and > num_items") { +val model = getALSModel +val numUsers = model.userFactors.count +val numItems = model.itemFactors.count val expected = Map( 0 -> Array((3, 54f), (4, 44f), (5, 42f), (6, 28f)), 1 -> Array((3, 39f), (5, 33f), (4, 26f), (6, 16f)), 2 -> Array((3, 51f), (5, 45f), (4, 30f), (6, 18f)) ) -checkRecommendations(topItems, expected, "item") - } - test("recommendForAllItems with k < num_users") { -val topUsers = getALSModel.recommendForAllItems(2) -assert(topUsers.count() == 4) -assert(topUsers.columns.contains("item")) - -val expected = Map( - 3 -> Array((0, 54f), (2, 51f)), - 4 -> Array((0, 44f), (2, 30f)), - 5 -> Array((2, 45f), (0, 42f)), - 6 -> Array((0, 28f), (2, 18f)) -) -checkRecommendations(topUsers, expected, "user") +Seq(2, 4, 6).foreach { k => + val n = math.min(k, numItems).toInt + val expectedUpToN = expected.mapValues(_.slice(0, n)) + val topItems = model.recommendForAllUsers(k) + assert(topItems.count() == numUsers) + assert(topItems.columns.contains("user")) + checkRecommendations(topItems, expectedUpToN, "item") +} } - test("recommendForAllItems with k = num_users") { -val topUsers = getALSModel.recommendForAllItems(3) -assert(topUsers.count() == 4) -assert(topUsers.columns.contains("item")) - + test("recommendForAllItems with k <, = and > num_users") { +val model = getALSModel +val numUsers = model.userFactors.count +val numItems = model.itemFactors.count val expected = Map( 3 -> Array((0, 54f), (2, 51f), (1, 39f)), 4 -> Array((0, 44f), (2, 30f), (1, 26f)), 5 -> Array((2, 45f), (0, 42f), (1, 33f)), 6 -> Array((0, 28f), (2, 18f), (1, 16f)) ) -checkRecommendations(topUsers, expected, "user") + +Seq(2, 3, 4).foreach { k => + val n = math.min(k, numUsers).toInt + val expectedUpToN = expected.mapValues(_.slice(0, n)) + val topUsers = getALSModel.recommendForAllItems(k) + assert(topUsers.count() == numItems) + assert(topUsers.columns.contains("item")) + checkRecommendations(topUsers, expectedUpToN, "user") +} } private def checkRecommendations( - To unsubscribe, e-mail:
spark git commit: [SPARK-20596][ML][TEST] Consolidate and improve ALS recommendAll test cases
Repository: spark Updated Branches: refs/heads/branch-2.2 d8a5a0d34 -> 7b9d05ad0 [SPARK-20596][ML][TEST] Consolidate and improve ALS recommendAll test cases Existing test cases for `recommendForAllX` methods (added in [SPARK-19535](https://issues.apache.org/jira/browse/SPARK-19535)) test `k < num items` and `k = num items`. Technically we should also test that `k > num items` returns the same results as `k = num items`. ## How was this patch tested? Updated existing unit tests. Author: Nick PentreathCloses #17860 from MLnick/SPARK-20596-als-rec-tests. (cherry picked from commit 58518d070777fc0665c4d02bad8adf910807df98) Signed-off-by: Nick Pentreath Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7b9d05ad Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7b9d05ad Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7b9d05ad Branch: refs/heads/branch-2.2 Commit: 7b9d05ad00455daa53ae4ef1a602a6c64c2c95a4 Parents: d8a5a0d Author: Nick Pentreath Authored: Mon May 8 12:45:00 2017 +0200 Committer: Nick Pentreath Committed: Mon May 8 12:45:17 2017 +0200 -- .../spark/ml/recommendation/ALSSuite.scala | 63 1 file changed, 25 insertions(+), 38 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7b9d05ad/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala -- diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala index 7574af3..9d31e79 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala @@ -671,58 +671,45 @@ class ALSSuite .setItemCol("item") } - test("recommendForAllUsers with k < num_items") { -val topItems = getALSModel.recommendForAllUsers(2) -assert(topItems.count() == 3) -assert(topItems.columns.contains("user")) - -val expected = Map( - 0 -> Array((3, 54f), (4, 44f)), - 1 -> Array((3, 39f), (5, 33f)), - 2 -> Array((3, 51f), (5, 45f)) -) -checkRecommendations(topItems, expected, "item") - } - - test("recommendForAllUsers with k = num_items") { -val topItems = getALSModel.recommendForAllUsers(4) -assert(topItems.count() == 3) -assert(topItems.columns.contains("user")) - + test("recommendForAllUsers with k <, = and > num_items") { +val model = getALSModel +val numUsers = model.userFactors.count +val numItems = model.itemFactors.count val expected = Map( 0 -> Array((3, 54f), (4, 44f), (5, 42f), (6, 28f)), 1 -> Array((3, 39f), (5, 33f), (4, 26f), (6, 16f)), 2 -> Array((3, 51f), (5, 45f), (4, 30f), (6, 18f)) ) -checkRecommendations(topItems, expected, "item") - } - test("recommendForAllItems with k < num_users") { -val topUsers = getALSModel.recommendForAllItems(2) -assert(topUsers.count() == 4) -assert(topUsers.columns.contains("item")) - -val expected = Map( - 3 -> Array((0, 54f), (2, 51f)), - 4 -> Array((0, 44f), (2, 30f)), - 5 -> Array((2, 45f), (0, 42f)), - 6 -> Array((0, 28f), (2, 18f)) -) -checkRecommendations(topUsers, expected, "user") +Seq(2, 4, 6).foreach { k => + val n = math.min(k, numItems).toInt + val expectedUpToN = expected.mapValues(_.slice(0, n)) + val topItems = model.recommendForAllUsers(k) + assert(topItems.count() == numUsers) + assert(topItems.columns.contains("user")) + checkRecommendations(topItems, expectedUpToN, "item") +} } - test("recommendForAllItems with k = num_users") { -val topUsers = getALSModel.recommendForAllItems(3) -assert(topUsers.count() == 4) -assert(topUsers.columns.contains("item")) - + test("recommendForAllItems with k <, = and > num_users") { +val model = getALSModel +val numUsers = model.userFactors.count +val numItems = model.itemFactors.count val expected = Map( 3 -> Array((0, 54f), (2, 51f), (1, 39f)), 4 -> Array((0, 44f), (2, 30f), (1, 26f)), 5 -> Array((2, 45f), (0, 42f), (1, 33f)), 6 -> Array((0, 28f), (2, 18f), (1, 16f)) ) -checkRecommendations(topUsers, expected, "user") + +Seq(2, 3, 4).foreach { k => + val n = math.min(k, numUsers).toInt + val expectedUpToN = expected.mapValues(_.slice(0, n)) + val topUsers = getALSModel.recommendForAllItems(k) + assert(topUsers.count() == numItems) + assert(topUsers.columns.contains("item")) + checkRecommendations(topUsers, expectedUpToN, "user") +} } private def
spark git commit: [SPARK-19956][CORE] Optimize a location order of blocks with topology information
Repository: spark Updated Branches: refs/heads/master 0f820e2b6 -> 15526653a [SPARK-19956][CORE] Optimize a location order of blocks with topology information ## What changes were proposed in this pull request? When call the method getLocations of BlockManager, we only compare the data block host. Random selection for non-local data blocks, this may cause the selected data block to be in a different rack. So in this patch to increase the sort of the rack. ## How was this patch tested? New test case. Please review http://spark.apache.org/contributing.html before opening a pull request. Author: Xianyang LiuCloses #17300 from ConeyLiu/blockmanager. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/15526653 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/15526653 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/15526653 Branch: refs/heads/master Commit: 15526653a93a32cde3c9ea0c0e68e35622b0a590 Parents: 0f820e2 Author: Xianyang Liu Authored: Mon May 8 17:33:47 2017 +0800 Committer: Wenchen Fan Committed: Mon May 8 17:33:47 2017 +0800 -- .../org/apache/spark/storage/BlockManager.scala | 11 +-- .../spark/storage/BlockManagerSuite.scala | 31 ++-- 2 files changed, 37 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/15526653/core/src/main/scala/org/apache/spark/storage/BlockManager.scala -- diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala index 3219969..33ce30c 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala @@ -612,12 +612,19 @@ private[spark] class BlockManager( /** * Return a list of locations for the given block, prioritizing the local machine since - * multiple block managers can share the same host. + * multiple block managers can share the same host, followed by hosts on the same rack. */ private def getLocations(blockId: BlockId): Seq[BlockManagerId] = { val locs = Random.shuffle(master.getLocations(blockId)) val (preferredLocs, otherLocs) = locs.partition { loc => blockManagerId.host == loc.host } -preferredLocs ++ otherLocs +blockManagerId.topologyInfo match { + case None => preferredLocs ++ otherLocs + case Some(_) => +val (sameRackLocs, differentRackLocs) = otherLocs.partition { + loc => blockManagerId.topologyInfo == loc.topologyInfo +} +preferredLocs ++ sameRackLocs ++ differentRackLocs +} } /** http://git-wip-us.apache.org/repos/asf/spark/blob/15526653/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala index a8b9604..1e7bcdb 100644 --- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala @@ -496,8 +496,8 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE assert(list2DiskGet.get.readMethod === DataReadMethod.Disk) } - test("optimize a location order of blocks") { -val localHost = Utils.localHostName() + test("optimize a location order of blocks without topology information") { +val localHost = "localhost" val otherHost = "otherHost" val bmMaster = mock(classOf[BlockManagerMaster]) val bmId1 = BlockManagerId("id1", localHost, 1) @@ -508,7 +508,32 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE val blockManager = makeBlockManager(128, "exec", bmMaster) val getLocations = PrivateMethod[Seq[BlockManagerId]]('getLocations) val locations = blockManager invokePrivate getLocations(BroadcastBlockId(0)) -assert(locations.map(_.host).toSet === Set(localHost, localHost, otherHost)) +assert(locations.map(_.host) === Seq(localHost, localHost, otherHost)) + } + + test("optimize a location order of blocks with topology information") { +val localHost = "localhost" +val otherHost = "otherHost" +val localRack = "localRack" +val otherRack = "otherRack" + +val bmMaster = mock(classOf[BlockManagerMaster]) +val bmId1 = BlockManagerId("id1", localHost, 1, Some(localRack)) +val bmId2 = BlockManagerId("id2", localHost, 2, Some(localRack)) +val bmId3 = BlockManagerId("id3", otherHost, 3, Some(otherRack)) +
spark git commit: [SPARK-20519][SQL][CORE] Modify to prevent some possible runtime exceptions
Repository: spark Updated Branches: refs/heads/master 2fdaeb52b -> 0f820e2b6 [SPARK-20519][SQL][CORE] Modify to prevent some possible runtime exceptions Signed-off-by: liuxian ## What changes were proposed in this pull request? When the input parameter is null, may be a runtime exception occurs ## How was this patch tested? Existing unit tests Author: liuxianCloses #17796 from 10110346/wip_lx_0428. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0f820e2b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0f820e2b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0f820e2b Branch: refs/heads/master Commit: 0f820e2b6c507dc4156703862ce65e598ca41cca Parents: 2fdaeb5 Author: liuxian Authored: Mon May 8 10:00:58 2017 +0100 Committer: Sean Owen Committed: Mon May 8 10:00:58 2017 +0100 -- .../main/scala/org/apache/spark/api/python/PythonRDD.scala | 2 +- .../main/scala/org/apache/spark/deploy/DeployMessage.scala | 8 .../main/scala/org/apache/spark/deploy/master/Master.scala | 2 +- .../org/apache/spark/deploy/master/MasterArguments.scala| 4 ++-- .../scala/org/apache/spark/deploy/master/WorkerInfo.scala | 2 +- .../main/scala/org/apache/spark/deploy/worker/Worker.scala | 2 +- .../org/apache/spark/deploy/worker/WorkerArguments.scala| 4 ++-- .../src/main/scala/org/apache/spark/executor/Executor.scala | 2 +- .../scala/org/apache/spark/storage/BlockManagerId.scala | 2 +- core/src/main/scala/org/apache/spark/util/RpcUtils.scala| 2 +- core/src/main/scala/org/apache/spark/util/Utils.scala | 9 + .../deploy/mesos/MesosClusterDispatcherArguments.scala | 2 +- 12 files changed, 21 insertions(+), 20 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0f820e2b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala -- diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala index b0dd2fc..fb0405b 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala @@ -879,7 +879,7 @@ private[spark] class PythonAccumulatorV2( private val serverPort: Int) extends CollectionAccumulator[Array[Byte]] { - Utils.checkHost(serverHost, "Expected hostname") + Utils.checkHost(serverHost) val bufferSize = SparkEnv.get.conf.getInt("spark.buffer.size", 65536) http://git-wip-us.apache.org/repos/asf/spark/blob/0f820e2b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala index ac09c6c..b5cb3f0 100644 --- a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala @@ -43,7 +43,7 @@ private[deploy] object DeployMessages { memory: Int, workerWebUiUrl: String) extends DeployMessage { -Utils.checkHost(host, "Required hostname") +Utils.checkHost(host) assert (port > 0) } @@ -131,7 +131,7 @@ private[deploy] object DeployMessages { // TODO(matei): replace hostPort with host case class ExecutorAdded(id: Int, workerId: String, hostPort: String, cores: Int, memory: Int) { -Utils.checkHostPort(hostPort, "Required hostport") +Utils.checkHostPort(hostPort) } case class ExecutorUpdated(id: Int, state: ExecutorState, message: Option[String], @@ -183,7 +183,7 @@ private[deploy] object DeployMessages { completedDrivers: Array[DriverInfo], status: MasterState) { -Utils.checkHost(host, "Required hostname") +Utils.checkHost(host) assert (port > 0) def uri: String = "spark://" + host + ":" + port @@ -201,7 +201,7 @@ private[deploy] object DeployMessages { drivers: List[DriverRunner], finishedDrivers: List[DriverRunner], masterUrl: String, cores: Int, memory: Int, coresUsed: Int, memoryUsed: Int, masterWebUiUrl: String) { -Utils.checkHost(host, "Required hostname") +Utils.checkHost(host) assert (port > 0) } http://git-wip-us.apache.org/repos/asf/spark/blob/0f820e2b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala index 816bf37..e061939 100644 ---
spark git commit: [SPARKR][DOC] fix typo in vignettes
Repository: spark Updated Branches: refs/heads/master 42cc6d13e -> 2fdaeb52b [SPARKR][DOC] fix typo in vignettes ## What changes were proposed in this pull request? Fix typo in vignettes Author: Wayne ZhangCloses #17884 from actuaryzhang/typo. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2fdaeb52 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2fdaeb52 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2fdaeb52 Branch: refs/heads/master Commit: 2fdaeb52bbe2ed1a9127ac72917286e505303c85 Parents: 42cc6d1 Author: Wayne Zhang Authored: Sun May 7 23:16:30 2017 -0700 Committer: Felix Cheung Committed: Sun May 7 23:16:30 2017 -0700 -- R/pkg/vignettes/sparkr-vignettes.Rmd | 36 +++ 1 file changed, 18 insertions(+), 18 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2fdaeb52/R/pkg/vignettes/sparkr-vignettes.Rmd -- diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd index d38ec4f..49f4ab8 100644 --- a/R/pkg/vignettes/sparkr-vignettes.Rmd +++ b/R/pkg/vignettes/sparkr-vignettes.Rmd @@ -65,7 +65,7 @@ We can view the first few rows of the `SparkDataFrame` by `head` or `showDF` fun head(carsDF) ``` -Common data processing operations such as `filter`, `select` are supported on the `SparkDataFrame`. +Common data processing operations such as `filter` and `select` are supported on the `SparkDataFrame`. ```{r} carsSubDF <- select(carsDF, "model", "mpg", "hp") carsSubDF <- filter(carsSubDF, carsSubDF$hp >= 200) @@ -379,7 +379,7 @@ out <- dapply(carsSubDF, function(x) { x <- cbind(x, x$mpg * 1.61) }, schema) head(collect(out)) ``` -Like `dapply`, apply a function to each partition of a `SparkDataFrame` and collect the result back. The output of function should be a `data.frame`, but no schema is required in this case. Note that `dapplyCollect` can fail if the output of UDF run on all the partition cannot be pulled to the driver and fit in driver memory. +Like `dapply`, `dapplyCollect` can apply a function to each partition of a `SparkDataFrame` and collect the result back. The output of the function should be a `data.frame`, but no schema is required in this case. Note that `dapplyCollect` can fail if the output of the UDF on all partitions cannot be pulled into the driver's memory. ```{r} out <- dapplyCollect( @@ -405,7 +405,7 @@ result <- gapply( head(arrange(result, "max_mpg", decreasing = TRUE)) ``` -Like gapply, `gapplyCollect` applies a function to each partition of a `SparkDataFrame` and collect the result back to R `data.frame`. The output of the function should be a `data.frame` but no schema is required in this case. Note that `gapplyCollect` can fail if the output of UDF run on all the partition cannot be pulled to the driver and fit in driver memory. +Like `gapply`, `gapplyCollect` can apply a function to each partition of a `SparkDataFrame` and collect the result back to R `data.frame`. The output of the function should be a `data.frame` but no schema is required in this case. Note that `gapplyCollect` can fail if the output of the UDF on all partitions cannot be pulled into the driver's memory. ```{r} result <- gapplyCollect( @@ -458,20 +458,20 @@ options(ops) ### SQL Queries -A `SparkDataFrame` can also be registered as a temporary view in Spark SQL and that allows you to run SQL queries over its data. The sql function enables applications to run SQL queries programmatically and returns the result as a `SparkDataFrame`. +A `SparkDataFrame` can also be registered as a temporary view in Spark SQL so that one can run SQL queries over its data. The sql function enables applications to run SQL queries programmatically and returns the result as a `SparkDataFrame`. ```{r} people <- read.df(paste0(sparkR.conf("spark.home"), "/examples/src/main/resources/people.json"), "json") ``` -Register this SparkDataFrame as a temporary view. +Register this `SparkDataFrame` as a temporary view. ```{r} createOrReplaceTempView(people, "people") ``` -SQL statements can be run by using the sql method. +SQL statements can be run using the sql method. ```{r} teenagers <- sql("SELECT name FROM people WHERE age >= 13 AND age <= 19") head(teenagers) @@ -780,7 +780,7 @@ head(predict(isoregModel, newDF)) `spark.gbt` fits a [gradient-boosted tree](https://en.wikipedia.org/wiki/Gradient_boosting) classification or regression model on a `SparkDataFrame`. Users can call `summary` to get a summary of the fitted model, `predict` to make predictions, and `write.ml`/`read.ml` to save/load fitted
spark git commit: [SPARKR][DOC] fix typo in vignettes
Repository: spark Updated Branches: refs/heads/branch-2.2 6c5b7e106 -> d8a5a0d34 [SPARKR][DOC] fix typo in vignettes ## What changes were proposed in this pull request? Fix typo in vignettes Author: Wayne ZhangCloses #17884 from actuaryzhang/typo. (cherry picked from commit 2fdaeb52bbe2ed1a9127ac72917286e505303c85) Signed-off-by: Felix Cheung Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d8a5a0d3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d8a5a0d3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d8a5a0d3 Branch: refs/heads/branch-2.2 Commit: d8a5a0d3420abbb911d8a80dc7165762eb08d779 Parents: 6c5b7e1 Author: Wayne Zhang Authored: Sun May 7 23:16:30 2017 -0700 Committer: Felix Cheung Committed: Sun May 7 23:16:44 2017 -0700 -- R/pkg/vignettes/sparkr-vignettes.Rmd | 36 +++ 1 file changed, 18 insertions(+), 18 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d8a5a0d3/R/pkg/vignettes/sparkr-vignettes.Rmd -- diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd index b933c59..0f6d5c2 100644 --- a/R/pkg/vignettes/sparkr-vignettes.Rmd +++ b/R/pkg/vignettes/sparkr-vignettes.Rmd @@ -65,7 +65,7 @@ We can view the first few rows of the `SparkDataFrame` by `head` or `showDF` fun head(carsDF) ``` -Common data processing operations such as `filter`, `select` are supported on the `SparkDataFrame`. +Common data processing operations such as `filter` and `select` are supported on the `SparkDataFrame`. ```{r} carsSubDF <- select(carsDF, "model", "mpg", "hp") carsSubDF <- filter(carsSubDF, carsSubDF$hp >= 200) @@ -364,7 +364,7 @@ out <- dapply(carsSubDF, function(x) { x <- cbind(x, x$mpg * 1.61) }, schema) head(collect(out)) ``` -Like `dapply`, apply a function to each partition of a `SparkDataFrame` and collect the result back. The output of function should be a `data.frame`, but no schema is required in this case. Note that `dapplyCollect` can fail if the output of UDF run on all the partition cannot be pulled to the driver and fit in driver memory. +Like `dapply`, `dapplyCollect` can apply a function to each partition of a `SparkDataFrame` and collect the result back. The output of the function should be a `data.frame`, but no schema is required in this case. Note that `dapplyCollect` can fail if the output of the UDF on all partitions cannot be pulled into the driver's memory. ```{r} out <- dapplyCollect( @@ -390,7 +390,7 @@ result <- gapply( head(arrange(result, "max_mpg", decreasing = TRUE)) ``` -Like gapply, `gapplyCollect` applies a function to each partition of a `SparkDataFrame` and collect the result back to R `data.frame`. The output of the function should be a `data.frame` but no schema is required in this case. Note that `gapplyCollect` can fail if the output of UDF run on all the partition cannot be pulled to the driver and fit in driver memory. +Like `gapply`, `gapplyCollect` can apply a function to each partition of a `SparkDataFrame` and collect the result back to R `data.frame`. The output of the function should be a `data.frame` but no schema is required in this case. Note that `gapplyCollect` can fail if the output of the UDF on all partitions cannot be pulled into the driver's memory. ```{r} result <- gapplyCollect( @@ -443,20 +443,20 @@ options(ops) ### SQL Queries -A `SparkDataFrame` can also be registered as a temporary view in Spark SQL and that allows you to run SQL queries over its data. The sql function enables applications to run SQL queries programmatically and returns the result as a `SparkDataFrame`. +A `SparkDataFrame` can also be registered as a temporary view in Spark SQL so that one can run SQL queries over its data. The sql function enables applications to run SQL queries programmatically and returns the result as a `SparkDataFrame`. ```{r} people <- read.df(paste0(sparkR.conf("spark.home"), "/examples/src/main/resources/people.json"), "json") ``` -Register this SparkDataFrame as a temporary view. +Register this `SparkDataFrame` as a temporary view. ```{r} createOrReplaceTempView(people, "people") ``` -SQL statements can be run by using the sql method. +SQL statements can be run using the sql method. ```{r} teenagers <- sql("SELECT name FROM people WHERE age >= 13 AND age <= 19") head(teenagers) @@ -765,7 +765,7 @@ head(predict(isoregModel, newDF)) `spark.gbt` fits a [gradient-boosted tree](https://en.wikipedia.org/wiki/Gradient_boosting) classification or regression model on a `SparkDataFrame`. Users can
spark git commit: [SPARK-20380][SQL] Unable to set/unset table comment property using ALTER TABLE SET/UNSET TBLPROPERTIES ddl
Repository: spark Updated Branches: refs/heads/master c24bdaab5 -> 42cc6d13e [SPARK-20380][SQL] Unable to set/unset table comment property using ALTER TABLE SET/UNSET TBLPROPERTIES ddl ### What changes were proposed in this pull request? Table comment was not getting set/unset using **ALTER TABLE SET/UNSET TBLPROPERTIES** query eg: ALTER TABLE table_with_comment SET TBLPROPERTIES("comment"= "modified comment) when user alter the table properties and adds/updates table comment,table comment which is a field of **CatalogTable** instance is not getting updated and old table comment if exists was shown to user, inorder to handle this issue, update the comment field value in **CatalogTable** with the newly added/modified comment along with other table level properties when user executes **ALTER TABLE SET TBLPROPERTIES** query. This pr has also taken care of unsetting the table comment when user executes query **ALTER TABLE UNSET TBLPROPERTIES** inorder to unset or remove table comment. eg: ALTER TABLE table_comment UNSET TBLPROPERTIES IF EXISTS ('comment') ### How was this patch tested? Added test cases as part of **SQLQueryTestSuite** for verifying table comment using desc formatted table query after adding/modifying table comment as part of **AlterTableSetPropertiesCommand** and unsetting the table comment using **AlterTableUnsetPropertiesCommand**. Author: sujith71955Closes #17649 from sujith71955/alter_table_comment. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/42cc6d13 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/42cc6d13 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/42cc6d13 Branch: refs/heads/master Commit: 42cc6d13edbebb7c435ec47c0c12b445e05fdd49 Parents: c24bdaa Author: sujith71955 Authored: Sun May 7 23:15:00 2017 -0700 Committer: Xiao Li Committed: Sun May 7 23:15:00 2017 -0700 -- .../sql/catalyst/catalog/InMemoryCatalog.scala | 8 +- .../spark/sql/execution/command/ddl.scala | 12 +- .../inputs/describe-table-after-alter-table.sql | 29 .../describe-table-after-alter-table.sql.out| 161 +++ 4 files changed, 204 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/42cc6d13/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala index 81dd8ef..8a5319b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala @@ -216,8 +216,8 @@ class InMemoryCatalog( } else { tableDefinition } - - catalog(db).tables.put(table, new TableDesc(tableWithLocation)) + val tableProp = tableWithLocation.properties.filter(_._1 != "comment") + catalog(db).tables.put(table, new TableDesc(tableWithLocation.copy(properties = tableProp))) } } @@ -298,7 +298,9 @@ class InMemoryCatalog( assert(tableDefinition.identifier.database.isDefined) val db = tableDefinition.identifier.database.get requireTableExists(db, tableDefinition.identifier.table) -catalog(db).tables(tableDefinition.identifier.table).table = tableDefinition +val updatedProperties = tableDefinition.properties.filter(kv => kv._1 != "comment") +val newTableDefinition = tableDefinition.copy(properties = updatedProperties) +catalog(db).tables(tableDefinition.identifier.table).table = newTableDefinition } override def alterTableSchema( http://git-wip-us.apache.org/repos/asf/spark/blob/42cc6d13/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala index 5554056..793fb9b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala @@ -231,8 +231,12 @@ case class AlterTableSetPropertiesCommand( val catalog = sparkSession.sessionState.catalog val table = catalog.getTableMetadata(tableName) DDLUtils.verifyAlterTableType(catalog, table, isView) -// This overrides old properties -val newTable = table.copy(properties = table.properties ++ properties) +// This overrides old
spark git commit: [SPARK-20626][SPARKR] address date test warning with timezone on windows
Repository: spark Updated Branches: refs/heads/branch-2.2 048e9890c -> 6c5b7e106 [SPARK-20626][SPARKR] address date test warning with timezone on windows ## What changes were proposed in this pull request? set timezone on windows ## How was this patch tested? unit test, AppVeyor Author: Felix CheungCloses #17892 from felixcheung/rtimestamptest. (cherry picked from commit c24bdaab5a234d18b273544cefc44cc4005bf8fc) Signed-off-by: Felix Cheung Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6c5b7e10 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6c5b7e10 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6c5b7e10 Branch: refs/heads/branch-2.2 Commit: 6c5b7e106895302a87cf6522d3c64c3badac699f Parents: 048e989 Author: Felix Cheung Authored: Sun May 7 23:10:18 2017 -0700 Committer: Felix Cheung Committed: Sun May 7 23:10:42 2017 -0700 -- R/pkg/inst/tests/testthat/test_sparkSQL.R | 4 1 file changed, 4 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6c5b7e10/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 3c985f2..3f445e2 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -96,6 +96,10 @@ mockLinesMapType <- c("{\"name\":\"Bob\",\"info\":{\"age\":16,\"height\":176.5}} mapTypeJsonPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp") writeLines(mockLinesMapType, mapTypeJsonPath) +if (.Platform$OS.type == "windows") { + Sys.setenv(TZ = "GMT") +} + test_that("calling sparkRSQL.init returns existing SQL context", { skip_on_cran() - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20626][SPARKR] address date test warning with timezone on windows
Repository: spark Updated Branches: refs/heads/master 22691556e -> c24bdaab5 [SPARK-20626][SPARKR] address date test warning with timezone on windows ## What changes were proposed in this pull request? set timezone on windows ## How was this patch tested? unit test, AppVeyor Author: Felix CheungCloses #17892 from felixcheung/rtimestamptest. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c24bdaab Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c24bdaab Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c24bdaab Branch: refs/heads/master Commit: c24bdaab5a234d18b273544cefc44cc4005bf8fc Parents: 2269155 Author: Felix Cheung Authored: Sun May 7 23:10:18 2017 -0700 Committer: Felix Cheung Committed: Sun May 7 23:10:18 2017 -0700 -- R/pkg/inst/tests/testthat/test_sparkSQL.R | 4 1 file changed, 4 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c24bdaab/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 0856bab..f517ce6 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -96,6 +96,10 @@ mockLinesMapType <- c("{\"name\":\"Bob\",\"info\":{\"age\":16,\"height\":176.5}} mapTypeJsonPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp") writeLines(mockLinesMapType, mapTypeJsonPath) +if (.Platform$OS.type == "windows") { + Sys.setenv(TZ = "GMT") +} + test_that("calling sparkRSQL.init returns existing SQL context", { skip_on_cran() - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org