git commit: Fixing typo in als.py
Repository: spark Updated Branches: refs/heads/branch-1.0 6f701ff55 - 98944a973 Fixing typo in als.py XtY should be Xty. Author: Evan Sparks evan.spa...@gmail.com Closes #696 from etrain/patch-2 and squashes the following commits: 634cb8d [Evan Sparks] Fixing typo in als.py Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/98944a97 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/98944a97 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/98944a97 Branch: refs/heads/branch-1.0 Commit: 98944a9734389cd4400516a1eb3afa5376f44927 Parents: 6f701ff Author: Evan Sparks evan.spa...@gmail.com Authored: Thu May 8 13:07:30 2014 -0700 Committer: Shivaram Venkataraman shiva...@eecs.berkeley.edu Committed: Thu May 8 16:49:33 2014 -0700 -- examples/src/main/python/als.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/98944a97/examples/src/main/python/als.py -- diff --git a/examples/src/main/python/als.py b/examples/src/main/python/als.py index 33700ab..01552dc 100755 --- a/examples/src/main/python/als.py +++ b/examples/src/main/python/als.py @@ -38,7 +38,7 @@ def update(i, vec, mat, ratings): ff = mat.shape[1] XtX = mat.T * mat -XtY = mat.T * ratings[i, :].T +Xty = mat.T * ratings[i, :].T for j in range(ff): XtX[j,j] += LAMBDA * uu
git commit: Fixing typo in als.py
Repository: spark Updated Branches: refs/heads/master c3f8b78c2 - 5c5e7d580 Fixing typo in als.py XtY should be Xty. Author: Evan Sparks evan.spa...@gmail.com Closes #696 from etrain/patch-2 and squashes the following commits: 634cb8d [Evan Sparks] Fixing typo in als.py Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5c5e7d58 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5c5e7d58 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5c5e7d58 Branch: refs/heads/master Commit: 5c5e7d5809d337ce41a7a90eb9201e12803aba48 Parents: c3f8b78 Author: Evan Sparks evan.spa...@gmail.com Authored: Thu May 8 13:07:30 2014 -0700 Committer: Shivaram Venkataraman shiva...@eecs.berkeley.edu Committed: Thu May 8 13:07:30 2014 -0700 -- examples/src/main/python/als.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5c5e7d58/examples/src/main/python/als.py -- diff --git a/examples/src/main/python/als.py b/examples/src/main/python/als.py index 33700ab..01552dc 100755 --- a/examples/src/main/python/als.py +++ b/examples/src/main/python/als.py @@ -38,7 +38,7 @@ def update(i, vec, mat, ratings): ff = mat.shape[1] XtX = mat.T * mat -XtY = mat.T * ratings[i, :].T +Xty = mat.T * ratings[i, :].T for j in range(ff): XtX[j,j] += LAMBDA * uu
git commit: [SPARK-2950] Add gc time and shuffle write time to JobLogger
Repository: spark Updated Branches: refs/heads/master 3570119c3 - 1d03a26a4 [SPARK-2950] Add gc time and shuffle write time to JobLogger The JobLogger is very useful for performing offline performance profiling of Spark jobs. GC Time and Shuffle Write time are available in TaskMetrics but are currently missed from the JobLogger output. This patch adds these two fields. ~~Since this is a small change, I didn't create a JIRA. Let me know if I should do that.~~ cc kayousterhout Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Closes #1869 from shivaram/job-logger and squashes the following commits: 1b709fc [Shivaram Venkataraman] Add a space before GC_TIME c418105 [Shivaram Venkataraman] Add gc time and shuffle write time to JobLogger Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1d03a26a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1d03a26a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1d03a26a Branch: refs/heads/master Commit: 1d03a26a4895c24ebfab1a3cf6656af75cb53003 Parents: 3570119 Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Authored: Sun Aug 10 12:44:17 2014 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Sun Aug 10 12:44:17 2014 -0700 -- .../main/scala/org/apache/spark/scheduler/JobLogger.scala | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1d03a26a/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala b/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala index 47dd112..4d6b5c8 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala @@ -162,6 +162,7 @@ class JobLogger(val user: String, val logDirName: String) extends SparkListener START_TIME= + taskInfo.launchTime + FINISH_TIME= + taskInfo.finishTime + EXECUTOR_ID= + taskInfo.executorId + HOST= + taskMetrics.hostname val executorRunTime = EXECUTOR_RUN_TIME= + taskMetrics.executorRunTime +val gcTime = GC_TIME= + taskMetrics.jvmGCTime val inputMetrics = taskMetrics.inputMetrics match { case Some(metrics) = READ_METHOD= + metrics.readMethod.toString + @@ -179,11 +180,13 @@ class JobLogger(val user: String, val logDirName: String) extends SparkListener case None = } val writeMetrics = taskMetrics.shuffleWriteMetrics match { - case Some(metrics) = SHUFFLE_BYTES_WRITTEN= + metrics.shuffleBytesWritten + case Some(metrics) = + SHUFFLE_BYTES_WRITTEN= + metrics.shuffleBytesWritten + + SHUFFLE_WRITE_TIME= + metrics.shuffleWriteTime case None = } -stageLogInfo(stageId, status + info + executorRunTime + inputMetrics + shuffleReadMetrics + - writeMetrics) +stageLogInfo(stageId, status + info + executorRunTime + gcTime + inputMetrics + + shuffleReadMetrics + writeMetrics) } /** - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [EC2] Factor out Mesos spark-ec2 branch
Repository: spark Updated Branches: refs/heads/master 76386e1a2 - 2aca97c7c [EC2] Factor out Mesos spark-ec2 branch We reference a specific branch in two places. This patch makes it one place. Author: Nicholas Chammas nicholas.cham...@gmail.com Closes #3008 from nchammas/mesos-spark-ec2-branch and squashes the following commits: 10a6089 [Nicholas Chammas] factor out mess spark-ec2 branch Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2aca97c7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2aca97c7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2aca97c7 Branch: refs/heads/master Commit: 2aca97c7cfdefea8b6f9dbb88951e9acdfd606d9 Parents: 76386e1 Author: Nicholas Chammas nicholas.cham...@gmail.com Authored: Mon Nov 3 09:02:35 2014 -0800 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Mon Nov 3 09:02:35 2014 -0800 -- ec2/spark_ec2.py | 11 +-- 1 file changed, 9 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2aca97c7/ec2/spark_ec2.py -- diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 0d6b82b..50f88f7 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -41,8 +41,9 @@ from boto import ec2 DEFAULT_SPARK_VERSION = 1.1.0 +MESOS_SPARK_EC2_BRANCH = v4 # A URL prefix from which to fetch AMI information -AMI_PREFIX = https://raw.github.com/mesos/spark-ec2/v2/ami-list; +AMI_PREFIX = https://raw.github.com/mesos/spark-ec2/{b}/ami-list.format(b=MESOS_SPARK_EC2_BRANCH) class UsageError(Exception): @@ -583,7 +584,13 @@ def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key): # NOTE: We should clone the repository before running deploy_files to # prevent ec2-variables.sh from being overwritten -ssh(master, opts, rm -rf spark-ec2 git clone https://github.com/mesos/spark-ec2.git -b v4) +ssh( +host=master, +opts=opts, +command=rm -rf spark-ec2 ++ ++ git clone https://github.com/mesos/spark-ec2.git -b {b}.format(b=MESOS_SPARK_EC2_BRANCH) +) print Deploying files to master... deploy_files(conn, deploy.generic, opts, master_nodes, slave_nodes, modules) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SPARK-4137] [EC2] Don't change working dir on user
Repository: spark Updated Branches: refs/heads/master 3d2b5bc5b - db45f5ad0 [SPARK-4137] [EC2] Don't change working dir on user This issue was uncovered after [this discussion](https://issues.apache.org/jira/browse/SPARK-3398?focusedCommentId=14187471page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14187471). Don't change the working directory on the user. This breaks relative paths the user may pass in, e.g., for the SSH identity file. ``` ./ec2/spark-ec2 -i ../my.pem ``` This patch will preserve the user's current working directory and allow calls like the one above to work. Author: Nicholas Chammas nicholas.cham...@gmail.com Closes #2988 from nchammas/spark-ec2-cwd and squashes the following commits: f3850b5 [Nicholas Chammas] pep8 fix fbc20c7 [Nicholas Chammas] revert to old commenting style 752f958 [Nicholas Chammas] specify deploy.generic path absolutely bcdf6a5 [Nicholas Chammas] fix typo 77871a2 [Nicholas Chammas] add clarifying comment ce071fc [Nicholas Chammas] don't change working dir Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/db45f5ad Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/db45f5ad Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/db45f5ad Branch: refs/heads/master Commit: db45f5ad0368760dbeaa618a04f66ae9b2bed656 Parents: 3d2b5bc Author: Nicholas Chammas nicholas.cham...@gmail.com Authored: Wed Nov 5 20:45:35 2014 -0800 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Wed Nov 5 20:45:35 2014 -0800 -- ec2/spark-ec2| 8 ++-- ec2/spark_ec2.py | 12 +++- 2 files changed, 17 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/db45f5ad/ec2/spark-ec2 -- diff --git a/ec2/spark-ec2 b/ec2/spark-ec2 index 31f9771..4aa9082 100755 --- a/ec2/spark-ec2 +++ b/ec2/spark-ec2 @@ -18,5 +18,9 @@ # limitations under the License. # -cd `dirname $0` -PYTHONPATH=./third_party/boto-2.4.1.zip/boto-2.4.1:$PYTHONPATH python ./spark_ec2.py $@ +# Preserve the user's CWD so that relative paths are passed correctly to +#+ the underlying Python script. +SPARK_EC2_DIR=$(dirname $0) + +PYTHONPATH=${SPARK_EC2_DIR}/third_party/boto-2.4.1.zip/boto-2.4.1:$PYTHONPATH \ +python ${SPARK_EC2_DIR}/spark_ec2.py $@ http://git-wip-us.apache.org/repos/asf/spark/blob/db45f5ad/ec2/spark_ec2.py -- diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 50f88f7..a5396c2 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -40,6 +40,7 @@ from boto.ec2.blockdevicemapping import BlockDeviceMapping, BlockDeviceType, EBS from boto import ec2 DEFAULT_SPARK_VERSION = 1.1.0 +SPARK_EC2_DIR = os.path.dirname(os.path.realpath(__file__)) MESOS_SPARK_EC2_BRANCH = v4 # A URL prefix from which to fetch AMI information @@ -593,7 +594,14 @@ def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key): ) print Deploying files to master... -deploy_files(conn, deploy.generic, opts, master_nodes, slave_nodes, modules) +deploy_files( +conn=conn, +root_dir=SPARK_EC2_DIR + / + deploy.generic, +opts=opts, +master_nodes=master_nodes, +slave_nodes=slave_nodes, +modules=modules +) print Running setup on master... setup_spark_cluster(master, opts) @@ -730,6 +738,8 @@ def get_num_disks(instance_type): # cluster (e.g. lists of masters and slaves). Files are only deployed to # the first master instance in the cluster, and we expect the setup # script to be run on that instance to copy them to other nodes. +# +# root_dir should be an absolute path to the directory with the files we want to deploy. def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules): active_master = master_nodes[0].public_dns_name - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SPARK-4137] [EC2] Don't change working dir on user
Repository: spark Updated Branches: refs/heads/branch-1.2 7e0da9f6b - 70f6f36e0 [SPARK-4137] [EC2] Don't change working dir on user This issue was uncovered after [this discussion](https://issues.apache.org/jira/browse/SPARK-3398?focusedCommentId=14187471page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14187471). Don't change the working directory on the user. This breaks relative paths the user may pass in, e.g., for the SSH identity file. ``` ./ec2/spark-ec2 -i ../my.pem ``` This patch will preserve the user's current working directory and allow calls like the one above to work. Author: Nicholas Chammas nicholas.cham...@gmail.com Closes #2988 from nchammas/spark-ec2-cwd and squashes the following commits: f3850b5 [Nicholas Chammas] pep8 fix fbc20c7 [Nicholas Chammas] revert to old commenting style 752f958 [Nicholas Chammas] specify deploy.generic path absolutely bcdf6a5 [Nicholas Chammas] fix typo 77871a2 [Nicholas Chammas] add clarifying comment ce071fc [Nicholas Chammas] don't change working dir (cherry picked from commit db45f5ad0368760dbeaa618a04f66ae9b2bed656) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/70f6f36e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/70f6f36e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/70f6f36e Branch: refs/heads/branch-1.2 Commit: 70f6f36e03f97847cd2f3e4fe2902bb8459ca6a3 Parents: 7e0da9f Author: Nicholas Chammas nicholas.cham...@gmail.com Authored: Wed Nov 5 20:45:35 2014 -0800 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Wed Nov 5 20:45:55 2014 -0800 -- ec2/spark-ec2| 8 ++-- ec2/spark_ec2.py | 12 +++- 2 files changed, 17 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/70f6f36e/ec2/spark-ec2 -- diff --git a/ec2/spark-ec2 b/ec2/spark-ec2 index 31f9771..4aa9082 100755 --- a/ec2/spark-ec2 +++ b/ec2/spark-ec2 @@ -18,5 +18,9 @@ # limitations under the License. # -cd `dirname $0` -PYTHONPATH=./third_party/boto-2.4.1.zip/boto-2.4.1:$PYTHONPATH python ./spark_ec2.py $@ +# Preserve the user's CWD so that relative paths are passed correctly to +#+ the underlying Python script. +SPARK_EC2_DIR=$(dirname $0) + +PYTHONPATH=${SPARK_EC2_DIR}/third_party/boto-2.4.1.zip/boto-2.4.1:$PYTHONPATH \ +python ${SPARK_EC2_DIR}/spark_ec2.py $@ http://git-wip-us.apache.org/repos/asf/spark/blob/70f6f36e/ec2/spark_ec2.py -- diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 50f88f7..a5396c2 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -40,6 +40,7 @@ from boto.ec2.blockdevicemapping import BlockDeviceMapping, BlockDeviceType, EBS from boto import ec2 DEFAULT_SPARK_VERSION = 1.1.0 +SPARK_EC2_DIR = os.path.dirname(os.path.realpath(__file__)) MESOS_SPARK_EC2_BRANCH = v4 # A URL prefix from which to fetch AMI information @@ -593,7 +594,14 @@ def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key): ) print Deploying files to master... -deploy_files(conn, deploy.generic, opts, master_nodes, slave_nodes, modules) +deploy_files( +conn=conn, +root_dir=SPARK_EC2_DIR + / + deploy.generic, +opts=opts, +master_nodes=master_nodes, +slave_nodes=slave_nodes, +modules=modules +) print Running setup on master... setup_spark_cluster(master, opts) @@ -730,6 +738,8 @@ def get_num_disks(instance_type): # cluster (e.g. lists of masters and slaves). Files are only deployed to # the first master instance in the cluster, and we expect the setup # script to be run on that instance to copy them to other nodes. +# +# root_dir should be an absolute path to the directory with the files we want to deploy. def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules): active_master = master_nodes[0].public_dns_name - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [Minor][SparkR] Minor refactor and removes redundancy related to cleanClosure.
Repository: spark Updated Branches: refs/heads/master b45059d0d - 0ba3fdd59 [Minor][SparkR] Minor refactor and removes redundancy related to cleanClosure. 1. Only use `cleanClosure` in creation of RRDDs. Normally, user and developer do not need to call `cleanClosure` in their function definition. 2. Removes redundant code (e.g. unnecessary wrapper functions) related to `cleanClosure`. Author: hlin09 hlin0...@gmail.com Closes #5495 from hlin09/cleanClosureFix and squashes the following commits: 74ec303 [hlin09] Minor refactor and removes redundancy. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0ba3fdd5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0ba3fdd5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0ba3fdd5 Branch: refs/heads/master Commit: 0ba3fdd5992cf09bd38303ebff34d2ed19e5e09b Parents: b45059d Author: hlin09 hlin0...@gmail.com Authored: Mon Apr 13 20:43:24 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Mon Apr 13 20:43:24 2015 -0700 -- R/pkg/R/RDD.R | 16 R/pkg/R/pairRDD.R | 4 2 files changed, 4 insertions(+), 16 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0ba3fdd5/R/pkg/R/RDD.R -- diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R index d6a7500..820027e 100644 --- a/R/pkg/R/RDD.R +++ b/R/pkg/R/RDD.R @@ -85,7 +85,7 @@ setMethod(initialize, PipelinedRDD, function(.Object, prev, func, jrdd_val) if (!inherits(prev, PipelinedRDD) || !isPipelinable(prev)) { # This transformation is the first in its stage: -.Object@func - func +.Object@func - cleanClosure(func) .Object@prev_jrdd - getJRDD(prev) .Object@env$prev_serializedMode - prev@env$serializedMode # NOTE: We use prev_serializedMode to track the serialization mode of prev_JRDD @@ -94,7 +94,7 @@ setMethod(initialize, PipelinedRDD, function(.Object, prev, func, jrdd_val) pipelinedFunc - function(split, iterator) { func(split, prev@func(split, iterator)) } -.Object@func - pipelinedFunc +.Object@func - cleanClosure(pipelinedFunc) .Object@prev_jrdd - prev@prev_jrdd # maintain the pipeline # Get the serialization mode of the parent RDD .Object@env$prev_serializedMode - prev@env$prev_serializedMode @@ -144,17 +144,13 @@ setMethod(getJRDD, signature(rdd = PipelinedRDD), return(rdd@env$jrdd_val) } -computeFunc - function(split, part) { - rdd@func(split, part) -} - packageNamesArr - serialize(.sparkREnv[[.packages]], connection = NULL) broadcastArr - lapply(ls(.broadcastNames), function(name) { get(name, .broadcastNames) }) -serializedFuncArr - serialize(computeFunc, connection = NULL) +serializedFuncArr - serialize(rdd@func, connection = NULL) prev_jrdd - rdd@prev_jrdd @@ -551,11 +547,7 @@ setMethod(mapPartitions, setMethod(lapplyPartitionsWithIndex, signature(X = RDD, FUN = function), function(X, FUN) { -FUN - cleanClosure(FUN) -closureCapturingFunc - function(split, part) { - FUN(split, part) -} -PipelinedRDD(X, closureCapturingFunc) +PipelinedRDD(X, FUN) }) #' @rdname lapplyPartitionsWithIndex http://git-wip-us.apache.org/repos/asf/spark/blob/0ba3fdd5/R/pkg/R/pairRDD.R -- diff --git a/R/pkg/R/pairRDD.R b/R/pkg/R/pairRDD.R index c2396c3..739d399 100644 --- a/R/pkg/R/pairRDD.R +++ b/R/pkg/R/pairRDD.R @@ -694,10 +694,6 @@ setMethod(cogroup, for (i in 1:rddsLen) { rdds[[i]] - lapply(rdds[[i]], function(x) { list(x[[1]], list(i, x[[2]])) }) - # TODO(hao): As issue [SparkR-142] mentions, the right value of i - # will not be captured into UDF if getJRDD is not invoked. - # It should be resolved together with that issue. - getJRDD(rdds[[i]]) # Capture the closure. } union.rdd - Reduce(unionRDD, rdds) group.func - function(vlist) { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-6818] [SPARKR] Support column deletion in SparkR DataFrame API.
Repository: spark Updated Branches: refs/heads/master 6220d933e - 73db132bf [SPARK-6818] [SPARKR] Support column deletion in SparkR DataFrame API. Author: Sun Rui rui@intel.com Closes #5655 from sun-rui/SPARK-6818 and squashes the following commits: 7c66570 [Sun Rui] [SPARK-6818][SPARKR] Support column deletion in SparkR DataFrame API. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/73db132b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/73db132b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/73db132b Branch: refs/heads/master Commit: 73db132bf503341c7a5cf9409351c282a8464175 Parents: 6220d93 Author: Sun Rui rui@intel.com Authored: Thu Apr 23 16:08:14 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Thu Apr 23 16:08:14 2015 -0700 -- R/pkg/R/DataFrame.R | 8 +++- R/pkg/inst/tests/test_sparkSQL.R | 5 + 2 files changed, 12 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/73db132b/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 861fe1c..b59b700 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -790,9 +790,12 @@ setMethod($, signature(x = DataFrame), setMethod($-, signature(x = DataFrame), function(x, name, value) { -stopifnot(class(value) == Column) +stopifnot(class(value) == Column || is.null(value)) cols - columns(x) if (name %in% cols) { + if (is.null(value)) { +cols - Filter(function(c) { c != name }, cols) + } cols - lapply(cols, function(c) { if (c == name) { alias(value, name) @@ -802,6 +805,9 @@ setMethod($-, signature(x = DataFrame), }) nx - select(x, cols) } else { + if (is.null(value)) { +return(x) + } nx - withColumn(x, name, value) } x@sdf - nx@sdf http://git-wip-us.apache.org/repos/asf/spark/blob/73db132b/R/pkg/inst/tests/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index 25831ae..af7a6c5 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -449,6 +449,11 @@ test_that(select operators, { df$age2 - df$age * 2 expect_equal(columns(df), c(name, age, age2)) expect_equal(count(where(df, df$age2 == df$age * 2)), 2) + + df$age2 - NULL + expect_equal(columns(df), c(name, age)) + df$age3 - NULL + expect_equal(columns(df), c(name, age)) }) test_that(select with column, { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-7033] [SPARKR] Clean usage of split. Use partition instead where applicable.
Repository: spark Updated Branches: refs/heads/master 6e57d57b3 - ebb77b2af [SPARK-7033] [SPARKR] Clean usage of split. Use partition instead where applicable. Author: Sun Rui rui@intel.com Closes #5628 from sun-rui/SPARK-7033 and squashes the following commits: 046bc9e [Sun Rui] Clean split usage in tests. d531c86 [Sun Rui] [SPARK-7033][SPARKR] Clean usage of split. Use partition instead where applicable. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ebb77b2a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ebb77b2a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ebb77b2a Branch: refs/heads/master Commit: ebb77b2aff085e71906b5de9d266ded89051af82 Parents: 6e57d57 Author: Sun Rui rui@intel.com Authored: Fri Apr 24 11:00:19 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Fri Apr 24 11:00:19 2015 -0700 -- R/pkg/R/RDD.R | 36 ++-- R/pkg/R/context.R | 20 ++-- R/pkg/R/pairRDD.R | 8 R/pkg/R/utils.R | 2 +- R/pkg/inst/tests/test_rdd.R | 12 ++-- 5 files changed, 39 insertions(+), 39 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ebb77b2a/R/pkg/R/RDD.R -- diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R index 1284313..cc09efb 100644 --- a/R/pkg/R/RDD.R +++ b/R/pkg/R/RDD.R @@ -91,8 +91,8 @@ setMethod(initialize, PipelinedRDD, function(.Object, prev, func, jrdd_val) # NOTE: We use prev_serializedMode to track the serialization mode of prev_JRDD # prev_serializedMode is used during the delayed computation of JRDD in getJRDD } else { -pipelinedFunc - function(split, iterator) { - func(split, prev@func(split, iterator)) +pipelinedFunc - function(partIndex, part) { + func(partIndex, prev@func(partIndex, part)) } .Object@func - cleanClosure(pipelinedFunc) .Object@prev_jrdd - prev@prev_jrdd # maintain the pipeline @@ -306,7 +306,7 @@ setMethod(numPartitions, signature(x = RDD), function(x) { jrdd - getJRDD(x) -partitions - callJMethod(jrdd, splits) +partitions - callJMethod(jrdd, partitions) callJMethod(partitions, size) }) @@ -452,8 +452,8 @@ setMethod(countByValue, setMethod(lapply, signature(X = RDD, FUN = function), function(X, FUN) { -func - function(split, iterator) { - lapply(iterator, FUN) +func - function(partIndex, part) { + lapply(part, FUN) } lapplyPartitionsWithIndex(X, func) }) @@ -538,8 +538,8 @@ setMethod(mapPartitions, #'\dontrun{ #' sc - sparkR.init() #' rdd - parallelize(sc, 1:10, 5L) -#' prod - lapplyPartitionsWithIndex(rdd, function(split, part) { -#' split * Reduce(+, part) }) +#' prod - lapplyPartitionsWithIndex(rdd, function(partIndex, part) { +#' partIndex * Reduce(+, part) }) #' collect(prod, flatten = FALSE) # 0, 7, 22, 45, 76 #'} #' @rdname lapplyPartitionsWithIndex @@ -813,7 +813,7 @@ setMethod(distinct, #' @examples #'\dontrun{ #' sc - sparkR.init() -#' rdd - parallelize(sc, 1:10) # ensure each num is in its own split +#' rdd - parallelize(sc, 1:10) #' collect(sampleRDD(rdd, FALSE, 0.5, 1618L)) # ~5 distinct elements #' collect(sampleRDD(rdd, TRUE, 0.5, 9L)) # ~5 elements possibly with duplicates #'} @@ -825,14 +825,14 @@ setMethod(sampleRDD, function(x, withReplacement, fraction, seed) { # The sampler: takes a partition and returns its sampled version. -samplingFunc - function(split, part) { +samplingFunc - function(partIndex, part) { set.seed(seed) res - vector(list, length(part)) len - 0 # Discards some random values to ensure each partition has a # different random seed. - runif(split) + runif(partIndex) for (elem in part) { if (withReplacement) { @@ -989,8 +989,8 @@ setMethod(coalesce, function(x, numPartitions, shuffle = FALSE) { numPartitions - numToInt(numPartitions) if (shuffle || numPartitions SparkR::numPartitions(x)) { - func - function(s, part) { - set.seed(s) # split as seed + func - function(partIndex, part) { + set.seed(partIndex) # partIndex as seed start - as.integer(sample(numPartitions, 1) - 1) lapply(seq_along(part), function(i
spark git commit: [SPARK-6852] [SPARKR] Accept numeric as numPartitions in SparkR.
Repository: spark Updated Branches: refs/heads/master ebb77b2af - caf0136ec [SPARK-6852] [SPARKR] Accept numeric as numPartitions in SparkR. Author: Sun Rui rui@intel.com Closes #5613 from sun-rui/SPARK-6852 and squashes the following commits: abaf02e [Sun Rui] Change the type of default numPartitions from integer to numeric in generics.R. 29d67c1 [Sun Rui] [SPARK-6852][SPARKR] Accept numeric as numPartitions in SparkR. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/caf0136e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/caf0136e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/caf0136e Branch: refs/heads/master Commit: caf0136ec5838cf5bf61f39a5b3474a505a6ae11 Parents: ebb77b2 Author: Sun Rui rui@intel.com Authored: Fri Apr 24 12:52:07 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Fri Apr 24 12:52:07 2015 -0700 -- R/pkg/R/RDD.R | 2 +- R/pkg/R/generics.R | 12 ++-- R/pkg/R/pairRDD.R | 24 3 files changed, 19 insertions(+), 19 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/caf0136e/R/pkg/R/RDD.R -- diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R index cc09efb..1662d6b 100644 --- a/R/pkg/R/RDD.R +++ b/R/pkg/R/RDD.R @@ -967,7 +967,7 @@ setMethod(keyBy, setMethod(repartition, signature(x = RDD, numPartitions = numeric), function(x, numPartitions) { -coalesce(x, numToInt(numPartitions), TRUE) +coalesce(x, numPartitions, TRUE) }) #' Return a new RDD that is reduced into numPartitions partitions. http://git-wip-us.apache.org/repos/asf/spark/blob/caf0136e/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 6c62333..34dbe84 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -60,7 +60,7 @@ setGeneric(countByValue, function(x) { standardGeneric(countByValue) }) #' @rdname distinct #' @export -setGeneric(distinct, function(x, numPartitions = 1L) { standardGeneric(distinct) }) +setGeneric(distinct, function(x, numPartitions = 1) { standardGeneric(distinct) }) #' @rdname filterRDD #' @export @@ -182,7 +182,7 @@ setGeneric(setName, function(x, name) { standardGeneric(setName) }) #' @rdname sortBy #' @export setGeneric(sortBy, - function(x, func, ascending = TRUE, numPartitions = 1L) { + function(x, func, ascending = TRUE, numPartitions = 1) { standardGeneric(sortBy) }) @@ -244,7 +244,7 @@ setGeneric(flatMapValues, function(X, FUN) { standardGeneric(flatMapValues) #' @rdname intersection #' @export -setGeneric(intersection, function(x, other, numPartitions = 1L) { +setGeneric(intersection, function(x, other, numPartitions = 1) { standardGeneric(intersection) }) #' @rdname keys @@ -346,21 +346,21 @@ setGeneric(rightOuterJoin, function(x, y, numPartitions) { standardGeneric(ri #' @rdname sortByKey #' @export setGeneric(sortByKey, - function(x, ascending = TRUE, numPartitions = 1L) { + function(x, ascending = TRUE, numPartitions = 1) { standardGeneric(sortByKey) }) #' @rdname subtract #' @export setGeneric(subtract, - function(x, other, numPartitions = 1L) { + function(x, other, numPartitions = 1) { standardGeneric(subtract) }) #' @rdname subtractByKey #' @export setGeneric(subtractByKey, - function(x, other, numPartitions = 1L) { + function(x, other, numPartitions = 1) { standardGeneric(subtractByKey) }) http://git-wip-us.apache.org/repos/asf/spark/blob/caf0136e/R/pkg/R/pairRDD.R -- diff --git a/R/pkg/R/pairRDD.R b/R/pkg/R/pairRDD.R index f99b474..9791e55 100644 --- a/R/pkg/R/pairRDD.R +++ b/R/pkg/R/pairRDD.R @@ -190,7 +190,7 @@ setMethod(flatMapValues, #' @rdname partitionBy #' @aliases partitionBy,RDD,integer-method setMethod(partitionBy, - signature(x = RDD, numPartitions = integer), + signature(x = RDD, numPartitions = numeric), function(x, numPartitions, partitionFunc = hashCode) { #if (missing(partitionFunc)) { @@ -211,7 +211,7 @@ setMethod(partitionBy, # the content (key-val pairs). pairwiseRRDD - newJObject(org.apache.spark.api.r.PairwiseRRDD, callJMethod(jrdd, rdd), - as.integer(numPartitions), + numToInt(numPartitions
spark git commit: [SPARK-6856] [R] Make RDD information more useful in SparkR
Repository: spark Updated Branches: refs/heads/master 998aac21f - 7078f6028 [SPARK-6856] [R] Make RDD information more useful in SparkR Author: Jeff Harrison jeffrharri...@gmail.com Closes #5667 from His-name-is-Joof/joofspark and squashes the following commits: f8814a6 [Jeff Harrison] newline added after RDD show() output 4d9d972 [Jeff Harrison] Merge branch 'master' into joofspark 9d2295e [Jeff Harrison] parallelize with 1:10 878b830 [Jeff Harrison] Merge branch 'master' into joofspark c8c0b80 [Jeff Harrison] add test for RDD function show() 123be65 [Jeff Harrison] SPARK-6856 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7078f602 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7078f602 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7078f602 Branch: refs/heads/master Commit: 7078f6028bf012235c664b02ec3541cbb0a248a7 Parents: 998aac2 Author: Jeff Harrison jeffrharri...@gmail.com Authored: Mon Apr 27 13:38:25 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Mon Apr 27 13:38:25 2015 -0700 -- R/pkg/R/RDD.R | 5 + R/pkg/inst/tests/test_rdd.R | 5 + 2 files changed, 10 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7078f602/R/pkg/R/RDD.R -- diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R index 1662d6b..f90c26b 100644 --- a/R/pkg/R/RDD.R +++ b/R/pkg/R/RDD.R @@ -66,6 +66,11 @@ setMethod(initialize, RDD, function(.Object, jrdd, serializedMode, .Object }) +setMethod(show, RDD, + function(.Object) { + cat(paste(callJMethod(.Object@jrdd, toString), \n, sep=)) + }) + setMethod(initialize, PipelinedRDD, function(.Object, prev, func, jrdd_val) { .Object@env - new.env() .Object@env$isCached - FALSE http://git-wip-us.apache.org/repos/asf/spark/blob/7078f602/R/pkg/inst/tests/test_rdd.R -- diff --git a/R/pkg/inst/tests/test_rdd.R b/R/pkg/inst/tests/test_rdd.R index d55af93..0320735 100644 --- a/R/pkg/inst/tests/test_rdd.R +++ b/R/pkg/inst/tests/test_rdd.R @@ -759,6 +759,11 @@ test_that(collectAsMap() on a pairwise RDD, { expect_equal(vals, list(`1` = a, `2` = b)) }) +test_that(show(), { + rdd - parallelize(sc, list(1:10)) + expect_output(show(rdd), ParallelCollectionRDD\\[\\d+\\] at parallelize at RRDD\\.scala:\\d+) +}) + test_that(sampleByKey() on pairwise RDDs, { rdd - parallelize(sc, 1:2000) pairsRDD - lapply(rdd, function(x) { if (x %% 2 == 0) list(a, x) else list(b, x) }) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-6991] [SPARKR] Adds support for zipPartitions.
Repository: spark Updated Branches: refs/heads/master ef82bddc1 - ca9f4ebb8 [SPARK-6991] [SPARKR] Adds support for zipPartitions. Author: hlin09 hlin0...@gmail.com Closes #5568 from hlin09/zipPartitions and squashes the following commits: 12c08a5 [hlin09] Fix comments d2d32db [hlin09] Merge branch 'master' into zipPartitions ec56d2f [hlin09] Fix test. 27655d3 [hlin09] Adds support for zipPartitions. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ca9f4ebb Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ca9f4ebb Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ca9f4ebb Branch: refs/heads/master Commit: ca9f4ebb8e510e521bf4df0331375ddb385fb9d2 Parents: ef82bdd Author: hlin09 hlin0...@gmail.com Authored: Mon Apr 27 15:04:37 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Mon Apr 27 15:04:37 2015 -0700 -- R/pkg/NAMESPACE | 1 + R/pkg/R/RDD.R | 46 R/pkg/R/generics.R | 5 +++ R/pkg/inst/tests/test_binary_function.R | 33 4 files changed, 85 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ca9f4ebb/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 8028364..e077eac 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -71,6 +71,7 @@ exportMethods( unpersist, value, values, + zipPartitions, zipRDD, zipWithIndex, zipWithUniqueId http://git-wip-us.apache.org/repos/asf/spark/blob/ca9f4ebb/R/pkg/R/RDD.R -- diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R index f90c26b..a3a0421 100644 --- a/R/pkg/R/RDD.R +++ b/R/pkg/R/RDD.R @@ -1595,3 +1595,49 @@ setMethod(intersection, keys(filterRDD(cogroup(rdd1, rdd2, numPartitions = numPartitions), filterFunction)) }) + +#' Zips an RDD's partitions with one (or more) RDD(s). +#' Same as zipPartitions in Spark. +#' +#' @param ... RDDs to be zipped. +#' @param func A function to transform zipped partitions. +#' @return A new RDD by applying a function to the zipped partitions. +#' Assumes that all the RDDs have the *same number of partitions*, but +#' does *not* require them to have the same number of elements in each partition. +#' @examples +#'\dontrun{ +#' sc - sparkR.init() +#' rdd1 - parallelize(sc, 1:2, 2L) # 1, 2 +#' rdd2 - parallelize(sc, 1:4, 2L) # 1:2, 3:4 +#' rdd3 - parallelize(sc, 1:6, 2L) # 1:3, 4:6 +#' collect(zipPartitions(rdd1, rdd2, rdd3, +#' func = function(x, y, z) { list(list(x, y, z))} )) +#' # list(list(1, c(1,2), c(1,2,3)), list(2, c(3,4), c(4,5,6))) +#'} +#' @rdname zipRDD +#' @aliases zipPartitions,RDD +setMethod(zipPartitions, + RDD, + function(..., func) { +rrdds - list(...) +if (length(rrdds) == 1) { + return(rrdds[[1]]) +} +nPart - sapply(rrdds, numPartitions) +if (length(unique(nPart)) != 1) { + stop(Can only zipPartitions RDDs which have the same number of partitions.) +} + +rrdds - lapply(rrdds, function(rdd) { + mapPartitionsWithIndex(rdd, function(partIndex, part) { +print(length(part)) +list(list(partIndex, part)) + }) +}) +union.rdd - Reduce(unionRDD, rrdds) +zipped.rdd - values(groupByKey(union.rdd, numPartitions = nPart[1])) +res - mapPartitions(zipped.rdd, function(plist) { + do.call(func, plist[[1]]) +}) +res + }) http://git-wip-us.apache.org/repos/asf/spark/blob/ca9f4ebb/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 34dbe84..e887293 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -217,6 +217,11 @@ setGeneric(unpersist, function(x, ...) { standardGeneric(unpersist) }) #' @export setGeneric(zipRDD, function(x, other) { standardGeneric(zipRDD) }) +#' @rdname zipRDD +#' @export +setGeneric(zipPartitions, function(..., func) { standardGeneric(zipPartitions) }, + signature = ...) + #' @rdname zipWithIndex #' @seealso zipWithUniqueId #' @export http://git-wip-us.apache.org/repos/asf/spark/blob/ca9f4ebb/R/pkg/inst/tests/test_binary_function.R -- diff --git a/R/pkg/inst/tests/test_binary_function.R b/R/pkg/inst/tests
[2/2] spark git commit: [SPARK-6824] Fill the docs for DataFrame API in SparkR
[SPARK-6824] Fill the docs for DataFrame API in SparkR This patch also removes the RDD docs from being built as a part of roxygen just by the method to delete ' ' of \#' . Author: hqzizania qian.hu...@intel.com Author: qhuang qian.hu...@intel.com Closes #5969 from hqzizania/R1 and squashes the following commits: 6d27696 [qhuang] fixes in NAMESPACE eb4b095 [qhuang] remove more docs 6394579 [qhuang] remove RDD docs in generics.R 6813860 [hqzizania] Fill the docs for DataFrame API in SparkR 857220f [hqzizania] remove the pairRDD docs from being built as a part of roxygen c045d64 [hqzizania] remove the RDD docs from being built as a part of roxygen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/008a60dd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/008a60dd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/008a60dd Branch: refs/heads/master Commit: 008a60dd371e76819d8e08ab638cac7b3a48c9fc Parents: 65afd3c Author: hqzizania qian.hu...@intel.com Authored: Fri May 8 11:25:04 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Fri May 8 11:25:04 2015 -0700 -- R/pkg/DESCRIPTION|2 +- R/pkg/NAMESPACE |4 - R/pkg/R/DataFrame.R | 95 +-- R/pkg/R/RDD.R| 1546 ++--- R/pkg/R/SQLContext.R | 64 +- R/pkg/R/broadcast.R | 64 +- R/pkg/R/context.R| 240 +++ R/pkg/R/generics.R | 318 +- R/pkg/R/pairRDD.R| 886 +- 9 files changed, 1610 insertions(+), 1609 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/008a60dd/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 1c1779a..efc85bb 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -15,11 +15,11 @@ Suggests: Description: R frontend for Spark License: Apache License (== 2.0) Collate: +'schema.R' 'generics.R' 'jobj.R' 'RDD.R' 'pairRDD.R' -'schema.R' 'column.R' 'group.R' 'DataFrame.R' http://git-wip-us.apache.org/repos/asf/spark/blob/008a60dd/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 3fb92be..7611f47 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -26,7 +26,6 @@ exportMethods(cache, intersect, isLocal, join, - length, limit, orderBy, names, @@ -101,9 +100,6 @@ export(cacheTable, tables, uncacheTable) -export(sparkRSQL.init, - sparkRHive.init) - export(structField, structField.jobj, structField.character, http://git-wip-us.apache.org/repos/asf/spark/blob/008a60dd/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 47d92f1..354642e 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -45,6 +45,9 @@ setMethod(initialize, DataFrame, function(.Object, sdf, isCached) { #' @rdname DataFrame #' @export +#' +#' @param sdf A Java object reference to the backing Scala DataFrame +#' @param isCached TRUE if the dataFrame is cached dataFrame - function(sdf, isCached = FALSE) { new(DataFrame, sdf, isCached) } @@ -244,7 +247,7 @@ setMethod(columns, }) #' @rdname columns -#' @export +#' @aliases names,DataFrame,function-method setMethod(names, signature(x = DataFrame), function(x) { @@ -399,23 +402,23 @@ setMethod(repartition, dataFrame(sdf) }) -#' toJSON -#' -#' Convert the rows of a DataFrame into JSON objects and return an RDD where -#' each element contains a JSON string. -#' -#' @param x A SparkSQL DataFrame -#' @return A StringRRDD of JSON objects -#' @rdname tojson -#' @export -#' @examples -#'\dontrun{ -#' sc - sparkR.init() -#' sqlCtx - sparkRSQL.init(sc) -#' path - path/to/file.json -#' df - jsonFile(sqlCtx, path) -#' newRDD - toJSON(df) -#'} +# toJSON +# +# Convert the rows of a DataFrame into JSON objects and return an RDD where +# each element contains a JSON string. +# +#@param x A SparkSQL DataFrame +# @return A StringRRDD of JSON objects +# @rdname tojson +# @export +# @examples +#\dontrun{ +# sc - sparkR.init() +# sqlCtx - sparkRSQL.init(sc) +# path - path/to/file.json +# df - jsonFile(sqlCtx, path) +# newRDD - toJSON(df) +#} setMethod(toJSON, signature(x = DataFrame), function(x) { @@ -578,8 +581,8 @@ setMethod(limit, dataFrame(res) }) -# Take the first NUM rows of a DataFrame and return a the results as a data.frame - +#' Take the first NUM rows of a DataFrame
[1/2] spark git commit: [SPARK-6824] Fill the docs for DataFrame API in SparkR
Repository: spark Updated Branches: refs/heads/branch-1.4 75fed0ca4 - 4f01f5b56 http://git-wip-us.apache.org/repos/asf/spark/blob/4f01f5b5/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 5838955..380e8eb 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -17,353 +17,353 @@ RDD Actions and Transformations -#' @rdname aggregateRDD -#' @seealso reduce -#' @export +# @rdname aggregateRDD +# @seealso reduce +# @export setGeneric(aggregateRDD, function(x, zeroValue, seqOp, combOp) { standardGeneric(aggregateRDD) }) -#' @rdname cache-methods -#' @export +# @rdname cache-methods +# @export setGeneric(cache, function(x) { standardGeneric(cache) }) -#' @rdname coalesce -#' @seealso repartition -#' @export +# @rdname coalesce +# @seealso repartition +# @export setGeneric(coalesce, function(x, numPartitions, ...) { standardGeneric(coalesce) }) -#' @rdname checkpoint-methods -#' @export +# @rdname checkpoint-methods +# @export setGeneric(checkpoint, function(x) { standardGeneric(checkpoint) }) -#' @rdname collect-methods -#' @export +# @rdname collect-methods +# @export setGeneric(collect, function(x, ...) { standardGeneric(collect) }) -#' @rdname collect-methods -#' @export +# @rdname collect-methods +# @export setGeneric(collectAsMap, function(x) { standardGeneric(collectAsMap) }) -#' @rdname collect-methods -#' @export +# @rdname collect-methods +# @export setGeneric(collectPartition, function(x, partitionId) { standardGeneric(collectPartition) }) -#' @rdname count -#' @export +# @rdname count +# @export setGeneric(count, function(x) { standardGeneric(count) }) -#' @rdname countByValue -#' @export +# @rdname countByValue +# @export setGeneric(countByValue, function(x) { standardGeneric(countByValue) }) -#' @rdname distinct -#' @export +# @rdname distinct +# @export setGeneric(distinct, function(x, numPartitions = 1) { standardGeneric(distinct) }) -#' @rdname filterRDD -#' @export +# @rdname filterRDD +# @export setGeneric(filterRDD, function(x, f) { standardGeneric(filterRDD) }) -#' @rdname first -#' @export +# @rdname first +# @export setGeneric(first, function(x) { standardGeneric(first) }) -#' @rdname flatMap -#' @export +# @rdname flatMap +# @export setGeneric(flatMap, function(X, FUN) { standardGeneric(flatMap) }) -#' @rdname fold -#' @seealso reduce -#' @export +# @rdname fold +# @seealso reduce +# @export setGeneric(fold, function(x, zeroValue, op) { standardGeneric(fold) }) -#' @rdname foreach -#' @export +# @rdname foreach +# @export setGeneric(foreach, function(x, func) { standardGeneric(foreach) }) -#' @rdname foreach -#' @export +# @rdname foreach +# @export setGeneric(foreachPartition, function(x, func) { standardGeneric(foreachPartition) }) # The jrdd accessor function. setGeneric(getJRDD, function(rdd, ...) { standardGeneric(getJRDD) }) -#' @rdname glom -#' @export +# @rdname glom +# @export setGeneric(glom, function(x) { standardGeneric(glom) }) -#' @rdname keyBy -#' @export +# @rdname keyBy +# @export setGeneric(keyBy, function(x, func) { standardGeneric(keyBy) }) -#' @rdname lapplyPartition -#' @export +# @rdname lapplyPartition +# @export setGeneric(lapplyPartition, function(X, FUN) { standardGeneric(lapplyPartition) }) -#' @rdname lapplyPartitionsWithIndex -#' @export +# @rdname lapplyPartitionsWithIndex +# @export setGeneric(lapplyPartitionsWithIndex, function(X, FUN) { standardGeneric(lapplyPartitionsWithIndex) }) -#' @rdname lapply -#' @export +# @rdname lapply +# @export setGeneric(map, function(X, FUN) { standardGeneric(map) }) -#' @rdname lapplyPartition -#' @export +# @rdname lapplyPartition +# @export setGeneric(mapPartitions, function(X, FUN) { standardGeneric(mapPartitions) }) -#' @rdname lapplyPartitionsWithIndex -#' @export +# @rdname lapplyPartitionsWithIndex +# @export setGeneric(mapPartitionsWithIndex, function(X, FUN) { standardGeneric(mapPartitionsWithIndex) }) -#' @rdname maximum -#' @export +# @rdname maximum +# @export setGeneric(maximum, function(x) { standardGeneric(maximum) }) -#' @rdname minimum -#' @export +# @rdname minimum +# @export setGeneric(minimum, function(x) { standardGeneric(minimum) }) -#' @rdname sumRDD -#' @export +# @rdname sumRDD +# @export setGeneric(sumRDD, function(x) { standardGeneric(sumRDD) }) -#' @rdname name -#' @export +# @rdname name +# @export setGeneric(name, function(x) { standardGeneric(name) }) -#' @rdname numPartitions -#' @export +# @rdname numPartitions +# @export setGeneric(numPartitions, function(x) { standardGeneric(numPartitions) }) -#' @rdname persist -#' @export +# @rdname persist +# @export setGeneric(persist, function(x, newLevel) { standardGeneric(persist) }) -#' @rdname pipeRDD -#' @export +# @rdname
[1/2] spark git commit: [SPARK-6824] Fill the docs for DataFrame API in SparkR
Repository: spark Updated Branches: refs/heads/master 65afd3ce8 - 008a60dd3 http://git-wip-us.apache.org/repos/asf/spark/blob/008a60dd/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 5838955..380e8eb 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -17,353 +17,353 @@ RDD Actions and Transformations -#' @rdname aggregateRDD -#' @seealso reduce -#' @export +# @rdname aggregateRDD +# @seealso reduce +# @export setGeneric(aggregateRDD, function(x, zeroValue, seqOp, combOp) { standardGeneric(aggregateRDD) }) -#' @rdname cache-methods -#' @export +# @rdname cache-methods +# @export setGeneric(cache, function(x) { standardGeneric(cache) }) -#' @rdname coalesce -#' @seealso repartition -#' @export +# @rdname coalesce +# @seealso repartition +# @export setGeneric(coalesce, function(x, numPartitions, ...) { standardGeneric(coalesce) }) -#' @rdname checkpoint-methods -#' @export +# @rdname checkpoint-methods +# @export setGeneric(checkpoint, function(x) { standardGeneric(checkpoint) }) -#' @rdname collect-methods -#' @export +# @rdname collect-methods +# @export setGeneric(collect, function(x, ...) { standardGeneric(collect) }) -#' @rdname collect-methods -#' @export +# @rdname collect-methods +# @export setGeneric(collectAsMap, function(x) { standardGeneric(collectAsMap) }) -#' @rdname collect-methods -#' @export +# @rdname collect-methods +# @export setGeneric(collectPartition, function(x, partitionId) { standardGeneric(collectPartition) }) -#' @rdname count -#' @export +# @rdname count +# @export setGeneric(count, function(x) { standardGeneric(count) }) -#' @rdname countByValue -#' @export +# @rdname countByValue +# @export setGeneric(countByValue, function(x) { standardGeneric(countByValue) }) -#' @rdname distinct -#' @export +# @rdname distinct +# @export setGeneric(distinct, function(x, numPartitions = 1) { standardGeneric(distinct) }) -#' @rdname filterRDD -#' @export +# @rdname filterRDD +# @export setGeneric(filterRDD, function(x, f) { standardGeneric(filterRDD) }) -#' @rdname first -#' @export +# @rdname first +# @export setGeneric(first, function(x) { standardGeneric(first) }) -#' @rdname flatMap -#' @export +# @rdname flatMap +# @export setGeneric(flatMap, function(X, FUN) { standardGeneric(flatMap) }) -#' @rdname fold -#' @seealso reduce -#' @export +# @rdname fold +# @seealso reduce +# @export setGeneric(fold, function(x, zeroValue, op) { standardGeneric(fold) }) -#' @rdname foreach -#' @export +# @rdname foreach +# @export setGeneric(foreach, function(x, func) { standardGeneric(foreach) }) -#' @rdname foreach -#' @export +# @rdname foreach +# @export setGeneric(foreachPartition, function(x, func) { standardGeneric(foreachPartition) }) # The jrdd accessor function. setGeneric(getJRDD, function(rdd, ...) { standardGeneric(getJRDD) }) -#' @rdname glom -#' @export +# @rdname glom +# @export setGeneric(glom, function(x) { standardGeneric(glom) }) -#' @rdname keyBy -#' @export +# @rdname keyBy +# @export setGeneric(keyBy, function(x, func) { standardGeneric(keyBy) }) -#' @rdname lapplyPartition -#' @export +# @rdname lapplyPartition +# @export setGeneric(lapplyPartition, function(X, FUN) { standardGeneric(lapplyPartition) }) -#' @rdname lapplyPartitionsWithIndex -#' @export +# @rdname lapplyPartitionsWithIndex +# @export setGeneric(lapplyPartitionsWithIndex, function(X, FUN) { standardGeneric(lapplyPartitionsWithIndex) }) -#' @rdname lapply -#' @export +# @rdname lapply +# @export setGeneric(map, function(X, FUN) { standardGeneric(map) }) -#' @rdname lapplyPartition -#' @export +# @rdname lapplyPartition +# @export setGeneric(mapPartitions, function(X, FUN) { standardGeneric(mapPartitions) }) -#' @rdname lapplyPartitionsWithIndex -#' @export +# @rdname lapplyPartitionsWithIndex +# @export setGeneric(mapPartitionsWithIndex, function(X, FUN) { standardGeneric(mapPartitionsWithIndex) }) -#' @rdname maximum -#' @export +# @rdname maximum +# @export setGeneric(maximum, function(x) { standardGeneric(maximum) }) -#' @rdname minimum -#' @export +# @rdname minimum +# @export setGeneric(minimum, function(x) { standardGeneric(minimum) }) -#' @rdname sumRDD -#' @export +# @rdname sumRDD +# @export setGeneric(sumRDD, function(x) { standardGeneric(sumRDD) }) -#' @rdname name -#' @export +# @rdname name +# @export setGeneric(name, function(x) { standardGeneric(name) }) -#' @rdname numPartitions -#' @export +# @rdname numPartitions +# @export setGeneric(numPartitions, function(x) { standardGeneric(numPartitions) }) -#' @rdname persist -#' @export +# @rdname persist +# @export setGeneric(persist, function(x, newLevel) { standardGeneric(persist) }) -#' @rdname pipeRDD -#' @export +# @rdname
[2/2] spark git commit: [SPARK-6824] Fill the docs for DataFrame API in SparkR
[SPARK-6824] Fill the docs for DataFrame API in SparkR This patch also removes the RDD docs from being built as a part of roxygen just by the method to delete ' ' of \#' . Author: hqzizania qian.hu...@intel.com Author: qhuang qian.hu...@intel.com Closes #5969 from hqzizania/R1 and squashes the following commits: 6d27696 [qhuang] fixes in NAMESPACE eb4b095 [qhuang] remove more docs 6394579 [qhuang] remove RDD docs in generics.R 6813860 [hqzizania] Fill the docs for DataFrame API in SparkR 857220f [hqzizania] remove the pairRDD docs from being built as a part of roxygen c045d64 [hqzizania] remove the RDD docs from being built as a part of roxygen (cherry picked from commit 008a60dd371e76819d8e08ab638cac7b3a48c9fc) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4f01f5b5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4f01f5b5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4f01f5b5 Branch: refs/heads/branch-1.4 Commit: 4f01f5b563819e2ce7d3ac7ea86162b4e76935a3 Parents: 75fed0c Author: hqzizania qian.hu...@intel.com Authored: Fri May 8 11:25:04 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Fri May 8 11:25:20 2015 -0700 -- R/pkg/DESCRIPTION|2 +- R/pkg/NAMESPACE |4 - R/pkg/R/DataFrame.R | 95 +-- R/pkg/R/RDD.R| 1546 ++--- R/pkg/R/SQLContext.R | 64 +- R/pkg/R/broadcast.R | 64 +- R/pkg/R/context.R| 240 +++ R/pkg/R/generics.R | 318 +- R/pkg/R/pairRDD.R| 886 +- 9 files changed, 1610 insertions(+), 1609 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4f01f5b5/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 1c1779a..efc85bb 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -15,11 +15,11 @@ Suggests: Description: R frontend for Spark License: Apache License (== 2.0) Collate: +'schema.R' 'generics.R' 'jobj.R' 'RDD.R' 'pairRDD.R' -'schema.R' 'column.R' 'group.R' 'DataFrame.R' http://git-wip-us.apache.org/repos/asf/spark/blob/4f01f5b5/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 3fb92be..7611f47 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -26,7 +26,6 @@ exportMethods(cache, intersect, isLocal, join, - length, limit, orderBy, names, @@ -101,9 +100,6 @@ export(cacheTable, tables, uncacheTable) -export(sparkRSQL.init, - sparkRHive.init) - export(structField, structField.jobj, structField.character, http://git-wip-us.apache.org/repos/asf/spark/blob/4f01f5b5/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 47d92f1..354642e 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -45,6 +45,9 @@ setMethod(initialize, DataFrame, function(.Object, sdf, isCached) { #' @rdname DataFrame #' @export +#' +#' @param sdf A Java object reference to the backing Scala DataFrame +#' @param isCached TRUE if the dataFrame is cached dataFrame - function(sdf, isCached = FALSE) { new(DataFrame, sdf, isCached) } @@ -244,7 +247,7 @@ setMethod(columns, }) #' @rdname columns -#' @export +#' @aliases names,DataFrame,function-method setMethod(names, signature(x = DataFrame), function(x) { @@ -399,23 +402,23 @@ setMethod(repartition, dataFrame(sdf) }) -#' toJSON -#' -#' Convert the rows of a DataFrame into JSON objects and return an RDD where -#' each element contains a JSON string. -#' -#' @param x A SparkSQL DataFrame -#' @return A StringRRDD of JSON objects -#' @rdname tojson -#' @export -#' @examples -#'\dontrun{ -#' sc - sparkR.init() -#' sqlCtx - sparkRSQL.init(sc) -#' path - path/to/file.json -#' df - jsonFile(sqlCtx, path) -#' newRDD - toJSON(df) -#'} +# toJSON +# +# Convert the rows of a DataFrame into JSON objects and return an RDD where +# each element contains a JSON string. +# +#@param x A SparkSQL DataFrame +# @return A StringRRDD of JSON objects +# @rdname tojson +# @export +# @examples +#\dontrun{ +# sc - sparkR.init() +# sqlCtx - sparkRSQL.init(sc) +# path - path/to/file.json +# df - jsonFile(sqlCtx, path) +# newRDD - toJSON(df) +#} setMethod(toJSON, signature(x = DataFrame), function(x) { @@ -578,8 +581,8 @@ setMethod(limit, dataFrame(res
spark git commit: updated ec2 instance types
Repository: spark Updated Branches: refs/heads/master 35c9599b9 - 1c78f6866 updated ec2 instance types I needed to run some d2 instances, so I updated the spark_ec2.py accordingly Author: Brendan Collins bcoll...@blueraster.com Closes #6014 from brendancol/ec2-instance-types-update and squashes the following commits: d7b4191 [Brendan Collins] Merge branch 'ec2-instance-types-update' of github.com:brendancol/spark into ec2-instance-types-update 6366c45 [Brendan Collins] added back cc1.4xlarge fc2931f [Brendan Collins] updated ec2 instance types 80c2aa6 [Brendan Collins] vertically aligned whitespace 85c6236 [Brendan Collins] vertically aligned whitespace 1657c26 [Brendan Collins] updated ec2 instance types Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1c78f686 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1c78f686 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1c78f686 Branch: refs/heads/master Commit: 1c78f6866ebbcfb41d9875bfa3c0b9fa23b188bf Parents: 35c9599 Author: Brendan Collins bcoll...@blueraster.com Authored: Fri May 8 15:59:34 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Fri May 8 15:59:34 2015 -0700 -- ec2/spark_ec2.py | 70 ++- 1 file changed, 47 insertions(+), 23 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1c78f686/ec2/spark_ec2.py -- diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 87c0818..ab4a96f 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -347,46 +347,57 @@ def get_validate_spark_version(version, repo): # Source: http://aws.amazon.com/amazon-linux-ami/instance-type-matrix/ -# Last Updated: 2014-06-20 +# Last Updated: 2015-05-08 # For easy maintainability, please keep this manually-inputted dictionary sorted by key. EC2_INSTANCE_TYPES = { c1.medium: pvm, c1.xlarge: pvm, +c3.large:pvm, +c3.xlarge: pvm, c3.2xlarge: pvm, c3.4xlarge: pvm, c3.8xlarge: pvm, -c3.large:pvm, -c3.xlarge: pvm, +c4.large:hvm, +c4.xlarge: hvm, +c4.2xlarge: hvm, +c4.4xlarge: hvm, +c4.8xlarge: hvm, cc1.4xlarge: hvm, cc2.8xlarge: hvm, cg1.4xlarge: hvm, cr1.8xlarge: hvm, +d2.xlarge: hvm, +d2.2xlarge: hvm, +d2.4xlarge: hvm, +d2.8xlarge: hvm, +g2.2xlarge: hvm, +g2.8xlarge: hvm, hi1.4xlarge: pvm, hs1.8xlarge: pvm, +i2.xlarge: hvm, i2.2xlarge: hvm, i2.4xlarge: hvm, i2.8xlarge: hvm, -i2.xlarge: hvm, -m1.large:pvm, -m1.medium: pvm, m1.small:pvm, +m1.medium: pvm, +m1.large:pvm, m1.xlarge: pvm, +m2.xlarge: pvm, m2.2xlarge: pvm, m2.4xlarge: pvm, -m2.xlarge: pvm, -m3.2xlarge: hvm, -m3.large:hvm, m3.medium: hvm, +m3.large:hvm, m3.xlarge: hvm, +m3.2xlarge: hvm, +r3.large:hvm, +r3.xlarge: hvm, r3.2xlarge: hvm, r3.4xlarge: hvm, r3.8xlarge: hvm, -r3.large:hvm, -r3.xlarge: hvm, t1.micro:pvm, -t2.medium: hvm, t2.micro:hvm, t2.small:hvm, +t2.medium: hvm, } @@ -878,44 +889,57 @@ def wait_for_cluster_state(conn, opts, cluster_instances, cluster_state): # Get number of local disks available for a given EC2 instance type. def get_num_disks(instance_type): # Source: http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html -# Last Updated: 2014-06-20 +# Last Updated: 2015-05-08 # For easy maintainability, please keep this manually-inputted dictionary sorted by key. disks_by_instance = { c1.medium: 1, c1.xlarge: 4, +c3.large:2, +c3.xlarge: 2, c3.2xlarge: 2, c3.4xlarge: 2, c3.8xlarge: 2, -c3.large:2, -c3.xlarge: 2, +c4.large:0, +c4.xlarge: 0, +c4.2xlarge: 0, +c4.4xlarge: 0, +c4.8xlarge: 0, cc1.4xlarge: 2, cc2.8xlarge: 4, cg1.4xlarge: 2, cr1.8xlarge: 2, +d2.xlarge: 3, +d2.2xlarge: 6, +d2.4xlarge: 12, +d2.8xlarge: 24, g2.2xlarge: 1, +g2.8xlarge: 2, hi1.4xlarge: 2, hs1.8xlarge: 24, +i2.xlarge: 1, i2.2xlarge: 2, i2.4xlarge: 4, i2.8xlarge: 8, -i2.xlarge: 1, -m1.large:2, -m1.medium: 1, m1.small:1, +m1.medium: 1, +m1.large:2, m1.xlarge: 4, +m2.xlarge: 1, m2.2xlarge: 1, m2.4xlarge: 2, -m2.xlarge: 1
spark git commit: [SPARK-7226] [SPARKR] Support math functions in R DataFrame
Repository: spark Updated Branches: refs/heads/master 9b6cf285d - 50da9e891 [SPARK-7226] [SPARKR] Support math functions in R DataFrame Author: qhuang qian.hu...@intel.com Closes #6170 from hqzizania/master and squashes the following commits: f20c39f [qhuang] add tests units and fixes 2a7d121 [qhuang] use a function name more familiar to R users 07aa72e [qhuang] Support math functions in R DataFrame Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/50da9e89 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/50da9e89 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/50da9e89 Branch: refs/heads/master Commit: 50da9e89161faa0ecdc1feb3ffee6c822a742034 Parents: 9b6cf28 Author: qhuang qian.hu...@intel.com Authored: Fri May 15 14:06:16 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Fri May 15 14:06:16 2015 -0700 -- R/pkg/NAMESPACE | 23 ++ R/pkg/R/column.R | 36 --- R/pkg/R/generics.R | 20 +++ R/pkg/inst/tests/test_sparkSQL.R | 24 +++ 4 files changed, 100 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/50da9e89/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index ba29614..64ffdcf 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -59,33 +59,56 @@ exportMethods(arrange, exportClasses(Column) exportMethods(abs, + acos, alias, approxCountDistinct, asc, + asin, + atan, + atan2, avg, cast, + cbrt, + ceiling, contains, + cos, + cosh, countDistinct, desc, endsWith, + exp, + expm1, + floor, getField, getItem, + hypot, isNotNull, isNull, last, like, + log, + log10, + log1p, lower, max, mean, min, n, n_distinct, + rint, rlike, + sign, + sin, + sinh, sqrt, startsWith, substr, sum, sumDistinct, + tan, + tanh, + toDegrees, + toRadians, upper) exportClasses(GroupedData) http://git-wip-us.apache.org/repos/asf/spark/blob/50da9e89/R/pkg/R/column.R -- diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index 9a68445..80e92d3 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -55,12 +55,17 @@ operators - list( + = plus, - = minus, * = multiply, / = divide, %% = mod, == = equalTo, = gt, = lt, != = notEqual, = = leq, = = geq, # we can not override `` and `||`, so use `` and `|` instead - = and, | = or #, ! = unary_$bang + = and, | = or, #, ! = unary_$bang + ^ = pow ) column_functions1 - c(asc, desc, isNull, isNotNull) column_functions2 - c(like, rlike, startsWith, endsWith, getField, getItem, contains) functions - c(min, max, sum, avg, mean, count, abs, sqrt, - first, last, lower, upper, sumDistinct) + first, last, lower, upper, sumDistinct, + acos, asin, atan, cbrt, ceiling, cos, cosh, exp, + expm1, floor, log, log10, log1p, rint, sign, + sin, sinh, tan, tanh, toDegrees, toRadians) +binary_mathfunctions- c(atan2, hypot) createOperator - function(op) { setMethod(op, @@ -76,7 +81,11 @@ createOperator - function(op) { if (class(e2) == Column) { e2 - e2@jc } -callJMethod(e1@jc, operators[[op]], e2) +if (op == ^) { + jc - callJStatic(org.apache.spark.sql.functions, operators[[op]], e1@jc, e2) +} else { + callJMethod(e1@jc, operators[[op]], e2) +} } column(jc) }) @@ -106,11 +115,29 @@ createStaticFunction - function(name) { setMethod(name, signature(x = Column), function(x) { + if (name == ceiling) { + name - ceil + } + if (name == sign) { + name - signum + } jc - callJStatic(org.apache.spark.sql.functions, name, x@jc
spark git commit: [SPARK-7226] [SPARKR] Support math functions in R DataFrame
Repository: spark Updated Branches: refs/heads/branch-1.4 a5f7b3b9c - 9ef6d743a [SPARK-7226] [SPARKR] Support math functions in R DataFrame Author: qhuang qian.hu...@intel.com Closes #6170 from hqzizania/master and squashes the following commits: f20c39f [qhuang] add tests units and fixes 2a7d121 [qhuang] use a function name more familiar to R users 07aa72e [qhuang] Support math functions in R DataFrame (cherry picked from commit 50da9e89161faa0ecdc1feb3ffee6c822a742034) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9ef6d743 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9ef6d743 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9ef6d743 Branch: refs/heads/branch-1.4 Commit: 9ef6d743a65cb3f962e4f2e0716f55dbe7efb084 Parents: a5f7b3b Author: qhuang qian.hu...@intel.com Authored: Fri May 15 14:06:16 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Fri May 15 14:06:39 2015 -0700 -- R/pkg/NAMESPACE | 23 ++ R/pkg/R/column.R | 36 --- R/pkg/R/generics.R | 20 +++ R/pkg/inst/tests/test_sparkSQL.R | 24 +++ 4 files changed, 100 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9ef6d743/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index ba29614..64ffdcf 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -59,33 +59,56 @@ exportMethods(arrange, exportClasses(Column) exportMethods(abs, + acos, alias, approxCountDistinct, asc, + asin, + atan, + atan2, avg, cast, + cbrt, + ceiling, contains, + cos, + cosh, countDistinct, desc, endsWith, + exp, + expm1, + floor, getField, getItem, + hypot, isNotNull, isNull, last, like, + log, + log10, + log1p, lower, max, mean, min, n, n_distinct, + rint, rlike, + sign, + sin, + sinh, sqrt, startsWith, substr, sum, sumDistinct, + tan, + tanh, + toDegrees, + toRadians, upper) exportClasses(GroupedData) http://git-wip-us.apache.org/repos/asf/spark/blob/9ef6d743/R/pkg/R/column.R -- diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index 9a68445..80e92d3 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -55,12 +55,17 @@ operators - list( + = plus, - = minus, * = multiply, / = divide, %% = mod, == = equalTo, = gt, = lt, != = notEqual, = = leq, = = geq, # we can not override `` and `||`, so use `` and `|` instead - = and, | = or #, ! = unary_$bang + = and, | = or, #, ! = unary_$bang + ^ = pow ) column_functions1 - c(asc, desc, isNull, isNotNull) column_functions2 - c(like, rlike, startsWith, endsWith, getField, getItem, contains) functions - c(min, max, sum, avg, mean, count, abs, sqrt, - first, last, lower, upper, sumDistinct) + first, last, lower, upper, sumDistinct, + acos, asin, atan, cbrt, ceiling, cos, cosh, exp, + expm1, floor, log, log10, log1p, rint, sign, + sin, sinh, tan, tanh, toDegrees, toRadians) +binary_mathfunctions- c(atan2, hypot) createOperator - function(op) { setMethod(op, @@ -76,7 +81,11 @@ createOperator - function(op) { if (class(e2) == Column) { e2 - e2@jc } -callJMethod(e1@jc, operators[[op]], e2) +if (op == ^) { + jc - callJStatic(org.apache.spark.sql.functions, operators[[op]], e1@jc, e2) +} else { + callJMethod(e1@jc, operators[[op]], e2) +} } column(jc) }) @@ -106,11 +115,29 @@ createStaticFunction - function(name) { setMethod(name, signature(x = Column), function(x) { + if (name == ceiling) { + name - ceil + } + if (name
spark git commit: [SPARK-6855] [SPARKR] Set R includes to get the right collate order.
Repository: spark Updated Branches: refs/heads/master ef3fb801a - 55f553a97 [SPARK-6855] [SPARKR] Set R includes to get the right collate order. This prevents tools like devtools::document creating invalid collate orders Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Closes #5462 from shivaram/collate-order and squashes the following commits: f3db562 [Shivaram Venkataraman] Set R includes to get the right collate order. This prevents tools like devtools::document creating invalid collate orders Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/55f553a9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/55f553a9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/55f553a9 Branch: refs/heads/master Commit: 55f553a979db925aa0c3559f7e80b99d2bf3feb4 Parents: ef3fb80 Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Authored: Thu Apr 16 13:06:34 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Thu Apr 16 13:06:34 2015 -0700 -- R/pkg/DESCRIPTION | 6 +++--- R/pkg/R/DataFrame.R | 2 +- R/pkg/R/column.R| 2 +- R/pkg/R/group.R | 3 +++ R/pkg/R/jobj.R | 3 +++ R/pkg/R/pairRDD.R | 2 ++ 6 files changed, 13 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/55f553a9/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 1842b97..052f68c 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -17,19 +17,19 @@ License: Apache License (== 2.0) Collate: 'generics.R' 'jobj.R' -'SQLTypes.R' 'RDD.R' 'pairRDD.R' +'SQLTypes.R' 'column.R' 'group.R' 'DataFrame.R' 'SQLContext.R' +'backend.R' 'broadcast.R' +'client.R' 'context.R' 'deserialize.R' 'serialize.R' 'sparkR.R' -'backend.R' -'client.R' 'utils.R' 'zzz.R' http://git-wip-us.apache.org/repos/asf/spark/blob/55f553a9/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index feafd56..044fdb4 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -17,7 +17,7 @@ # DataFrame.R - DataFrame class and methods implemented in S4 OO classes -#' @include jobj.R SQLTypes.R RDD.R pairRDD.R column.R group.R +#' @include generics.R jobj.R SQLTypes.R RDD.R pairRDD.R column.R group.R NULL setOldClass(jobj) http://git-wip-us.apache.org/repos/asf/spark/blob/55f553a9/R/pkg/R/column.R -- diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index e196305..b282001 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -17,7 +17,7 @@ # Column Class -#' @include generics.R jobj.R +#' @include generics.R jobj.R SQLTypes.R NULL setOldClass(jobj) http://git-wip-us.apache.org/repos/asf/spark/blob/55f553a9/R/pkg/R/group.R -- diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R index 09fc0a7..855fbdf 100644 --- a/R/pkg/R/group.R +++ b/R/pkg/R/group.R @@ -17,6 +17,9 @@ # group.R - GroupedData class and methods implemented in S4 OO classes +#' @include generics.R jobj.R SQLTypes.R column.R +NULL + setOldClass(jobj) #' @title S4 class that represents a GroupedData http://git-wip-us.apache.org/repos/asf/spark/blob/55f553a9/R/pkg/R/jobj.R -- diff --git a/R/pkg/R/jobj.R b/R/pkg/R/jobj.R index 4180f14..a8a2523 100644 --- a/R/pkg/R/jobj.R +++ b/R/pkg/R/jobj.R @@ -18,6 +18,9 @@ # References to objects that exist on the JVM backend # are maintained using the jobj. +#' @include generics.R +NULL + # Maintain a reference count of Java object references # This allows us to GC the java object when it is safe .validJobjs - new.env(parent = emptyenv()) http://git-wip-us.apache.org/repos/asf/spark/blob/55f553a9/R/pkg/R/pairRDD.R -- diff --git a/R/pkg/R/pairRDD.R b/R/pkg/R/pairRDD.R index 739d399..5d64822 100644 --- a/R/pkg/R/pairRDD.R +++ b/R/pkg/R/pairRDD.R @@ -16,6 +16,8 @@ # # Operations supported on RDDs contains pairs (i.e key, value) +#' @include generics.R jobj.R RDD.R +NULL Actions and Transformations - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-6807] [SparkR] Merge recent SparkR-pkg changes
Repository: spark Updated Branches: refs/heads/master a83571acc - 59e206deb [SPARK-6807] [SparkR] Merge recent SparkR-pkg changes This PR pulls in recent changes in SparkR-pkg, including cartesian, intersection, sampleByKey, subtract, subtractByKey, except, and some API for StructType and StructField. Author: cafreeman cfree...@alteryx.com Author: Davies Liu dav...@databricks.com Author: Zongheng Yang zonghen...@gmail.com Author: Shivaram Venkataraman shivaram.venkatara...@gmail.com Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Author: Sun Rui rui@intel.com Closes #5436 from davies/R3 and squashes the following commits: c2b09be [Davies Liu] SQLTypes - schema a5a02f2 [Davies Liu] Merge branch 'master' of github.com:apache/spark into R3 168b7fe [Davies Liu] sort generics b1fe460 [Davies Liu] fix conflict in README.md e74c04e [Davies Liu] fix schema.R 4f5ac09 [Davies Liu] Merge branch 'master' of github.com:apache/spark into R5 41f8184 [Davies Liu] rm man ae78312 [Davies Liu] Merge pull request #237 from sun-rui/SPARKR-154_3 1bdcb63 [Zongheng Yang] Updates to README.md. 5a553e7 [cafreeman] Use object attribute instead of argument 71372d9 [cafreeman] Update docs and examples 8526d2e71 [cafreeman] Remove `tojson` functions 6ef5f2d [cafreeman] Fix spacing 7741d66 [cafreeman] Rename the SQL DataType function 141efd8 [Shivaram Venkataraman] Merge pull request #245 from hqzizania/upstream 9387402 [Davies Liu] fix style 40199eb [Shivaram Venkataraman] Move except into sorted position 07d0dbc [Sun Rui] [SPARKR-244] Fix test failure after integration of subtract() and subtractByKey() for RDD. 7e8caa3 [Shivaram Venkataraman] Merge pull request #246 from hlin09/fixCombineByKey ed66c81 [cafreeman] Update `subtract` to work with `generics.R` f3ba785 [cafreeman] Fixed duplicate export 275deb4 [cafreeman] Update `NAMESPACE` and tests 1a3b63d [cafreeman] new version of `CreateDF` 836c4bf [cafreeman] Update `createDataFrame` and `toDF` be5d5c1 [cafreeman] refactor schema functions 40338a4 [Zongheng Yang] Merge pull request #244 from sun-rui/SPARKR-154_5 20b97a6 [Zongheng Yang] Merge pull request #234 from hqzizania/assist ba54e34 [Shivaram Venkataraman] Merge pull request #238 from sun-rui/SPARKR-154_4 c9497a3 [Shivaram Venkataraman] Merge pull request #208 from lythesia/master b317aa7 [Zongheng Yang] Merge pull request #243 from hqzizania/master 136a07e [Zongheng Yang] Merge pull request #242 from hqzizania/stats cd66603 [cafreeman] new line at EOF 8b76e81 [Shivaram Venkataraman] Merge pull request #233 from redbaron/fail-early-on-missing-dep 7dd81b7 [cafreeman] Documentation 0e2a94f [cafreeman] Define functions for schema and fields Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/59e206de Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/59e206de Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/59e206de Branch: refs/heads/master Commit: 59e206deb7346148412bbf5ba4ab626718fadf18 Parents: a83571a Author: cafreeman cfree...@alteryx.com Authored: Fri Apr 17 13:42:19 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Fri Apr 17 13:42:19 2015 -0700 -- R/pkg/DESCRIPTION | 2 +- R/pkg/NAMESPACE | 20 +- R/pkg/R/DataFrame.R | 18 +- R/pkg/R/RDD.R | 205 --- R/pkg/R/SQLContext.R| 44 +--- R/pkg/R/SQLTypes.R | 64 -- R/pkg/R/column.R| 2 +- R/pkg/R/generics.R | 46 - R/pkg/R/group.R | 2 +- R/pkg/R/pairRDD.R | 192 + R/pkg/R/schema.R| 162 +++ R/pkg/R/serialize.R | 9 +- R/pkg/R/utils.R | 80 R/pkg/inst/tests/test_rdd.R | 193 ++--- R/pkg/inst/tests/test_shuffle.R | 12 ++ R/pkg/inst/tests/test_sparkSQL.R| 35 ++-- R/pkg/inst/worker/worker.R | 59 +- .../scala/org/apache/spark/api/r/RRDD.scala | 131 ++-- .../scala/org/apache/spark/api/r/SerDe.scala| 14 +- .../org/apache/spark/sql/api/r/SQLUtils.scala | 32 ++- 20 files changed, 971 insertions(+), 351 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/59e206de/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 052f68c..1c1779a 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -19,7 +19,7 @@ Collate: 'jobj.R
spark git commit: [SPARK-6850] [SparkR] use one partition when we need to compare the whole result
Repository: spark Updated Branches: refs/heads/master 4740d6a15 - 68ecdb7f9 [SPARK-6850] [SparkR] use one partition when we need to compare the whole result Author: Davies Liu dav...@databricks.com Closes #5460 from davies/r_test and squashes the following commits: 0a593ce [Davies Liu] use one partition when we need to compare the whole result Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/68ecdb7f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/68ecdb7f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/68ecdb7f Branch: refs/heads/master Commit: 68ecdb7f99ae30f7c04c33a47ab7f59a3836f2a4 Parents: 4740d6a Author: Davies Liu dav...@databricks.com Authored: Fri Apr 10 15:35:45 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Fri Apr 10 15:35:45 2015 -0700 -- R/pkg/inst/tests/test_binaryFile.R | 4 ++-- R/pkg/inst/tests/test_textFile.R | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/68ecdb7f/R/pkg/inst/tests/test_binaryFile.R -- diff --git a/R/pkg/inst/tests/test_binaryFile.R b/R/pkg/inst/tests/test_binaryFile.R index 4bb5f58..ca4218f 100644 --- a/R/pkg/inst/tests/test_binaryFile.R +++ b/R/pkg/inst/tests/test_binaryFile.R @@ -27,7 +27,7 @@ test_that(saveAsObjectFile()/objectFile() following textFile() works, { fileName2 - tempfile(pattern=spark-test, fileext=.tmp) writeLines(mockFile, fileName1) - rdd - textFile(sc, fileName1) + rdd - textFile(sc, fileName1, 1) saveAsObjectFile(rdd, fileName2) rdd - objectFile(sc, fileName2) expect_equal(collect(rdd), as.list(mockFile)) @@ -40,7 +40,7 @@ test_that(saveAsObjectFile()/objectFile() works on a parallelized list, { fileName - tempfile(pattern=spark-test, fileext=.tmp) l - list(1, 2, 3) - rdd - parallelize(sc, l) + rdd - parallelize(sc, l, 1) saveAsObjectFile(rdd, fileName) rdd - objectFile(sc, fileName) expect_equal(collect(rdd), l) http://git-wip-us.apache.org/repos/asf/spark/blob/68ecdb7f/R/pkg/inst/tests/test_textFile.R -- diff --git a/R/pkg/inst/tests/test_textFile.R b/R/pkg/inst/tests/test_textFile.R index 7bb3e80..6b87b4b 100644 --- a/R/pkg/inst/tests/test_textFile.R +++ b/R/pkg/inst/tests/test_textFile.R @@ -81,7 +81,7 @@ test_that(textFile() followed by a saveAsTextFile() returns the same content, fileName2 - tempfile(pattern=spark-test, fileext=.tmp) writeLines(mockFile, fileName1) - rdd - textFile(sc, fileName1) + rdd - textFile(sc, fileName1, 1L) saveAsTextFile(rdd, fileName2) rdd - textFile(sc, fileName2) expect_equal(collect(rdd), as.list(mockFile)) @@ -93,7 +93,7 @@ test_that(textFile() followed by a saveAsTextFile() returns the same content, test_that(saveAsTextFile() on a parallelized list works as expected, { fileName - tempfile(pattern=spark-test, fileext=.tmp) l - list(1, 2, 3) - rdd - parallelize(sc, l) + rdd - parallelize(sc, l, 1L) saveAsTextFile(rdd, fileName) rdd - textFile(sc, fileName) expect_equal(collect(rdd), lapply(l, function(x) {toString(x)})) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[5/7] spark git commit: [SPARK-5654] Integrate SparkR
(filterRDD(rdd, function (x) { x 3 }))) # c(1, 2) +#'} +#' @rdname filterRDD +#' @aliases filterRDD,RDD,function-method +setMethod(filterRDD, + signature(x = RDD, f = function), + function(x, f) { +filter.func - function(part) { + Filter(f, part) +} +lapplyPartition(x, filter.func) + }) + +#' @rdname filterRDD +#' @aliases Filter +setMethod(Filter, + signature(f = function, x = RDD), + function(f, x) { +filterRDD(x, f) + }) + +#' Reduce across elements of an RDD. +#' +#' This function reduces the elements of this RDD using the +#' specified commutative and associative binary operator. +#' +#' @param x The RDD to reduce +#' @param func Commutative and associative function to apply on elements +#' of the RDD. +#' @examples +#'\dontrun{ +#' sc - sparkR.init() +#' rdd - parallelize(sc, 1:10) +#' reduce(rdd, +) # 55 +#'} +#' @rdname reduce +#' @aliases reduce,RDD,ANY-method +setMethod(reduce, + signature(x = RDD, func = ANY), + function(x, func) { + +reducePartition - function(part) { + Reduce(func, part) +} + +partitionList - collect(lapplyPartition(x, reducePartition), + flatten = FALSE) +Reduce(func, partitionList) + }) + +#' Get the maximum element of an RDD. +#' +#' @param x The RDD to get the maximum element from +#' @examples +#'\dontrun{ +#' sc - sparkR.init() +#' rdd - parallelize(sc, 1:10) +#' maximum(rdd) # 10 +#'} +#' @rdname maximum +#' @aliases maximum,RDD +setMethod(maximum, + signature(x = RDD), + function(x) { +reduce(x, max) + }) + +#' Get the minimum element of an RDD. +#' +#' @param x The RDD to get the minimum element from +#' @examples +#'\dontrun{ +#' sc - sparkR.init() +#' rdd - parallelize(sc, 1:10) +#' minimum(rdd) # 1 +#'} +#' @rdname minimum +#' @aliases minimum,RDD +setMethod(minimum, + signature(x = RDD), + function(x) { +reduce(x, min) + }) + +#' Add up the elements in an RDD. +#' +#' @param x The RDD to add up the elements in +#' @examples +#'\dontrun{ +#' sc - sparkR.init() +#' rdd - parallelize(sc, 1:10) +#' sumRDD(rdd) # 55 +#'} +#' @rdname sumRDD +#' @aliases sumRDD,RDD +setMethod(sumRDD, + signature(x = RDD), + function(x) { +reduce(x, +) + }) + +#' Applies a function to all elements in an RDD, and force evaluation. +#' +#' @param x The RDD to apply the function +#' @param func The function to be applied. +#' @return invisible NULL. +#' @examples +#'\dontrun{ +#' sc - sparkR.init() +#' rdd - parallelize(sc, 1:10) +#' foreach(rdd, function(x) { save(x, file=...) }) +#'} +#' @rdname foreach +#' @aliases foreach,RDD,function-method +setMethod(foreach, + signature(x = RDD, func = function), + function(x, func) { +partition.func - function(x) { + lapply(x, func) + NULL +} +invisible(collect(mapPartitions(x, partition.func))) + }) + +#' Applies a function to each partition in an RDD, and force evaluation. +#' +#' @examples +#'\dontrun{ +#' sc - sparkR.init() +#' rdd - parallelize(sc, 1:10) +#' foreachPartition(rdd, function(part) { save(part, file=...); NULL }) +#'} +#' @rdname foreach +#' @aliases foreachPartition,RDD,function-method +setMethod(foreachPartition, + signature(x = RDD, func = function), + function(x, func) { +invisible(collect(mapPartitions(x, func))) + }) + +#' Take elements from an RDD. +#' +#' This function takes the first NUM elements in the RDD and +#' returns them in a list. +#' +#' @param x The RDD to take elements from +#' @param num Number of elements to take +#' @examples +#'\dontrun{ +#' sc - sparkR.init() +#' rdd - parallelize(sc, 1:10) +#' take(rdd, 2L) # list(1, 2) +#'} +#' @rdname take +#' @aliases take,RDD,numeric-method +setMethod(take, + signature(x = RDD, num = numeric), + function(x, num) { +resList - list() +index - -1 +jrdd - getJRDD(x) +numPartitions - numPartitions(x) + +# TODO(shivaram): Collect more than one partition based on size +# estimates similar to the scala version of `take`. +while (TRUE) { + index - index + 1 + + if (length(resList) = num || index = numPartitions) +break + + # a JList of byte arrays + partitionArr - callJMethod(jrdd, collectPartitions, as.list(as.integer(index))) + partition - partitionArr[[1]] + + size - num - length(resList) + # elems is capped to have at most `size` elements + elems - convertJListToRList(partition, + flatten = TRUE
[7/7] spark git commit: [SPARK-5654] Integrate SparkR
[SPARK-5654] Integrate SparkR This pull requests integrates SparkR, an R frontend for Spark. The SparkR package contains both RDD and DataFrame APIs in R and is integrated with Spark's submission scripts to work on different cluster managers. Some integration points that would be great to get feedback on: 1. Build procedure: SparkR requires R to be installed on the machine to be built. Right now we have a new Maven profile `-PsparkR` that can be used to enable SparkR builds 2. YARN cluster mode: The R package that is built needs to be present on the driver and all the worker nodes during execution. The R package location is currently set using SPARK_HOME, but this might not work on YARN cluster mode. The SparkR package represents the work of many contributors and attached below is a list of people along with areas they worked on edwardt (edwart) - Documentation improvements Felix Cheung (felixcheung) - Documentation improvements Hossein Falaki (falaki) - Documentation improvements Chris Freeman (cafreeman) - DataFrame API, Programming Guide Todd Gao (7c00) - R worker Internals Ryan Hafen (hafen) - SparkR Internals Qian Huang (hqzizania) - RDD API Hao Lin (hlin09) - RDD API, Closure cleaner Evert Lammerts (evertlammerts) - DataFrame API Davies Liu (davies) - DataFrame API, R worker internals, Merging with Spark Yi Lu (lythesia) - RDD API, Worker internals Matt Massie (massie) - Jenkins build Harihar Nahak (hnahak87) - SparkR examples Oscar Olmedo (oscaroboto) - Spark configuration Antonio Piccolboni (piccolbo) - SparkR examples, Namespace bug fixes Dan Putler (dputler) - Dataframe API, SparkR Install Guide Ashutosh Raina (ashutoshraina) - Build improvements Josh Rosen (joshrosen) - Travis CI build Sun Rui (sun-rui)- RDD API, JVM Backend, Shuffle improvements Shivaram Venkataraman (shivaram) - RDD API, JVM Backend, Worker Internals Zongheng Yang (concretevitamin) - RDD API, Pipelined RDDs, Examples and EC2 guide Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Author: Shivaram Venkataraman shivaram.venkatara...@gmail.com Author: Zongheng Yang zonghen...@gmail.com Author: cafreeman cfree...@alteryx.com Author: Shivaram Venkataraman shiva...@eecs.berkeley.edu Author: Davies Liu dav...@databricks.com Author: Davies Liu davies@gmail.com Author: hlin09 hlin0...@gmail.com Author: Sun Rui rui@intel.com Author: lythesia iranaik...@gmail.com Author: oscaroboto osca...@gmail.com Author: Antonio Piccolboni anto...@piccolboni.info Author: root edward Author: edwardt edwardt.t...@gmail.com Author: hqzizania qian.hu...@intel.com Author: dputler dan.put...@gmail.com Author: Todd Gao todd.gao.2...@gmail.com Author: Chris Freeman cfree...@alteryx.com Author: Felix Cheung fcheung@AVVOMAC-119.local Author: Hossein hoss...@databricks.com Author: Evert Lammerts ev...@apache.org Author: Felix Cheung fche...@avvomac-119.t-mobile.com Author: felixcheung felixcheun...@hotmail.com Author: Ryan Hafen rha...@gmail.com Author: Ashutosh Raina ashutoshra...@users.noreply.github.com Author: Oscar Olmedo osca...@gmail.com Author: Josh Rosen rosenvi...@gmail.com Author: Yi Lu iranaik...@gmail.com Author: Harihar Nahak hnaha...@users.noreply.github.com Closes #5096 from shivaram/R and squashes the following commits: da64742 [Davies Liu] fix Date serialization 59266d1 [Davies Liu] check exclusive of primary-py-file and primary-r-file 55808e4 [Davies Liu] fix tests 5581c75 [Davies Liu] update author of SparkR f731b48 [Shivaram Venkataraman] Only run SparkR tests if R is installed 64eda24 [Shivaram Venkataraman] Merge branch 'R' of https://github.com/amplab-extras/spark into R d7c3f22 [Shivaram Venkataraman] Address code review comments Changes include 1. Adding SparkR docs to API docs generated 2. Style fixes in SparkR scala files 3. Clean up of shell scripts and explanation of install-dev.sh 377151f [Shivaram Venkataraman] Merge remote-tracking branch 'apache/master' into R eb5da53 [Shivaram Venkataraman] Merge pull request #3 from davies/R2 a18ff5c [Davies Liu] Update sparkR.R 5133f3a [Shivaram Venkataraman] Merge pull request #7 from hqzizania/R3 940b631 [hqzizania] [SPARKR-92] Phase 2: implement sum(rdd) 0e788c0 [Shivaram Venkataraman] Merge pull request #5 from hlin09/doc-fix 3487461 [hlin09] Add tests log in .gitignore. 1d1802e [Shivaram Venkataraman] Merge pull request #4 from felixcheung/r-require 11981b7 [felixcheung] Update R to fail early if SparkR package is missing c300e08 [Davies Liu] remove duplicated file b045701 [Davies Liu] Merge branch 'remote_r' into R 19c9368 [Davies Liu] Merge branch 'sparkr-sql' of github.com:amplab-extras/SparkR-pkg into remote_r f8fa8af [Davies Liu] mute logging when start/stop context e7104b6 [Davies Liu] remove ::: in SparkR a1777eb [Davies Liu] move rules into R/.gitignore e88b649 [Davies Liu] Merge branch 'R' of github.com:amplab-extras/spark into R 6e20e71 [Davies Liu] address comments b433817 [Davies Liu] Merge branch 'master' of github.com:apache/spark into R
[6/7] spark git commit: [SPARK-5654] Integrate SparkR
http://git-wip-us.apache.org/repos/asf/spark/blob/2fe0a1aa/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE new file mode 100644 index 000..a354cdc --- /dev/null +++ b/R/pkg/NAMESPACE @@ -0,0 +1,182 @@ +#exportPattern(^[[:alpha:]]+) +exportClasses(RDD) +exportClasses(Broadcast) +exportMethods( + aggregateByKey, + aggregateRDD, + cache, + checkpoint, + coalesce, + cogroup, + collect, + collectAsMap, + collectPartition, + combineByKey, + count, + countByKey, + countByValue, + distinct, + Filter, + filterRDD, + first, + flatMap, + flatMapValues, + fold, + foldByKey, + foreach, + foreachPartition, + fullOuterJoin, + glom, + groupByKey, + join, + keyBy, + keys, + length, + lapply, + lapplyPartition, + lapplyPartitionsWithIndex, + leftOuterJoin, + lookup, + map, + mapPartitions, + mapPartitionsWithIndex, + mapValues, + maximum, + minimum, + numPartitions, + partitionBy, + persist, + pipeRDD, + reduce, + reduceByKey, + reduceByKeyLocally, + repartition, + rightOuterJoin, + sampleRDD, + saveAsTextFile, + saveAsObjectFile, + sortBy, + sortByKey, + sumRDD, + take, + takeOrdered, + takeSample, + top, + unionRDD, + unpersist, + value, + values, + zipRDD, + zipWithIndex, + zipWithUniqueId + ) + +# S3 methods exported +export( + textFile, + objectFile, + parallelize, + hashCode, + includePackage, + broadcast, + setBroadcastValue, + setCheckpointDir + ) +export(sparkR.init) +export(sparkR.stop) +export(print.jobj) +useDynLib(SparkR, stringHashCode) +importFrom(methods, setGeneric, setMethod, setOldClass) + +# SparkRSQL + +exportClasses(DataFrame) + +exportMethods(columns, + distinct, + dtypes, + explain, + filter, + groupBy, + head, + insertInto, + intersect, + isLocal, + limit, + orderBy, + names, + printSchema, + registerTempTable, + repartition, + sampleDF, + saveAsParquetFile, + saveAsTable, + saveDF, + schema, + select, + selectExpr, + show, + showDF, + sortDF, + subtract, + toJSON, + toRDD, + unionAll, + where, + withColumn, + withColumnRenamed) + +exportClasses(Column) + +exportMethods(abs, + alias, + approxCountDistinct, + asc, + avg, + cast, + contains, + countDistinct, + desc, + endsWith, + getField, + getItem, + isNotNull, + isNull, + last, + like, + lower, + max, + mean, + min, + rlike, + sqrt, + startsWith, + substr, + sum, + sumDistinct, + upper) + +exportClasses(GroupedData) +exportMethods(agg) + +export(sparkRSQL.init, + sparkRHive.init) + +export(cacheTable, + clearCache, + createDataFrame, + createExternalTable, + dropTempTable, + jsonFile, + jsonRDD, + loadDF, + parquetFile, + sql, + table, + tableNames, + tables, + toDF, + uncacheTable) + +export(print.structType, + print.structField) http://git-wip-us.apache.org/repos/asf/spark/blob/2fe0a1aa/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R new file mode 100644 index 000..feafd56 --- /dev/null +++ b/R/pkg/R/DataFrame.R @@ -0,0 +1,1270 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional
[4/7] spark git commit: [SPARK-5654] Integrate SparkR
http://git-wip-us.apache.org/repos/asf/spark/blob/2fe0a1aa/R/pkg/R/context.R -- diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R new file mode 100644 index 000..2fc0bb2 --- /dev/null +++ b/R/pkg/R/context.R @@ -0,0 +1,225 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the License); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# context.R: SparkContext driven functions + +getMinSplits - function(sc, minSplits) { + if (is.null(minSplits)) { +defaultParallelism - callJMethod(sc, defaultParallelism) +minSplits - min(defaultParallelism, 2) + } + as.integer(minSplits) +} + +#' Create an RDD from a text file. +#' +#' This function reads a text file from HDFS, a local file system (available on all +#' nodes), or any Hadoop-supported file system URI, and creates an +#' RDD of strings from it. +#' +#' @param sc SparkContext to use +#' @param path Path of file to read. A vector of multiple paths is allowed. +#' @param minSplits Minimum number of splits to be created. If NULL, the default +#' value is chosen based on available parallelism. +#' @return RDD where each item is of type \code{character} +#' @export +#' @examples +#'\dontrun{ +#' sc - sparkR.init() +#' lines - textFile(sc, myfile.txt) +#'} +textFile - function(sc, path, minSplits = NULL) { + # Allow the user to have a more flexible definiton of the text file path + path - suppressWarnings(normalizePath(path)) + #' Convert a string vector of paths to a string containing comma separated paths + path - paste(path, collapse = ,) + + jrdd - callJMethod(sc, textFile, path, getMinSplits(sc, minSplits)) + # jrdd is of type JavaRDD[String] + RDD(jrdd, string) +} + +#' Load an RDD saved as a SequenceFile containing serialized objects. +#' +#' The file to be loaded should be one that was previously generated by calling +#' saveAsObjectFile() of the RDD class. +#' +#' @param sc SparkContext to use +#' @param path Path of file to read. A vector of multiple paths is allowed. +#' @param minSplits Minimum number of splits to be created. If NULL, the default +#' value is chosen based on available parallelism. +#' @return RDD containing serialized R objects. +#' @seealso saveAsObjectFile +#' @export +#' @examples +#'\dontrun{ +#' sc - sparkR.init() +#' rdd - objectFile(sc, myfile) +#'} +objectFile - function(sc, path, minSplits = NULL) { + # Allow the user to have a more flexible definiton of the text file path + path - suppressWarnings(normalizePath(path)) + #' Convert a string vector of paths to a string containing comma separated paths + path - paste(path, collapse = ,) + + jrdd - callJMethod(sc, objectFile, path, getMinSplits(sc, minSplits)) + # Assume the RDD contains serialized R objects. + RDD(jrdd, byte) +} + +#' Create an RDD from a homogeneous list or vector. +#' +#' This function creates an RDD from a local homogeneous list in R. The elements +#' in the list are split into \code{numSlices} slices and distributed to nodes +#' in the cluster. +#' +#' @param sc SparkContext to use +#' @param coll collection to parallelize +#' @param numSlices number of partitions to create in the RDD +#' @return an RDD created from this collection +#' @export +#' @examples +#'\dontrun{ +#' sc - sparkR.init() +#' rdd - parallelize(sc, 1:10, 2) +#' # The RDD should contain 10 elements +#' length(rdd) +#'} +parallelize - function(sc, coll, numSlices = 1) { + # TODO: bound/safeguard numSlices + # TODO: unit tests for if the split works for all primitives + # TODO: support matrix, data frame, etc + if ((!is.list(coll) !is.vector(coll)) || is.data.frame(coll)) { +if (is.data.frame(coll)) { + message(paste(context.R: A data frame is parallelized by columns.)) +} else { + if (is.matrix(coll)) { +message(paste(context.R: A matrix is parallelized by elements.)) + } else { +message(paste(context.R: parallelize() currently only supports lists and vectors., + Calling as.list() to coerce coll into a list.)) + } +} +coll - as.list(coll) + } + + if (numSlices length(coll)) +numSlices - length(coll) + + sliceLen - ceiling(length(coll) / numSlices) + slices - split(coll, rep(1:(numSlices + 1), each =
[1/7] spark git commit: [SPARK-5654] Integrate SparkR
Repository: spark Updated Branches: refs/heads/master 1b2aab8d5 - 2fe0a1aae http://git-wip-us.apache.org/repos/asf/spark/blob/2fe0a1aa/core/src/main/scala/org/apache/spark/api/r/SerDe.scala -- diff --git a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala new file mode 100644 index 000..ccb2a37 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala @@ -0,0 +1,340 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the License); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.api.r + +import java.io.{DataInputStream, DataOutputStream} +import java.sql.{Date, Time} + +import scala.collection.JavaConversions._ + +/** + * Utility functions to serialize, deserialize objects to / from R + */ +private[spark] object SerDe { + + // Type mapping from R to Java + // + // NULL - void + // integer - Int + // character - String + // logical - Boolean + // double, numeric - Double + // raw - Array[Byte] + // Date - Date + // POSIXlt/POSIXct - Time + // + // list[T] - Array[T], where T is one of above mentioned types + // environment - Map[String, T], where T is a native type + // jobj - Object, where jobj is an object created in the backend + + def readObjectType(dis: DataInputStream): Char = { +dis.readByte().toChar + } + + def readObject(dis: DataInputStream): Object = { +val dataType = readObjectType(dis) +readTypedObject(dis, dataType) + } + + def readTypedObject( + dis: DataInputStream, + dataType: Char): Object = { +dataType match { + case 'n' = null + case 'i' = new java.lang.Integer(readInt(dis)) + case 'd' = new java.lang.Double(readDouble(dis)) + case 'b' = new java.lang.Boolean(readBoolean(dis)) + case 'c' = readString(dis) + case 'e' = readMap(dis) + case 'r' = readBytes(dis) + case 'l' = readList(dis) + case 'D' = readDate(dis) + case 't' = readTime(dis) + case 'j' = JVMObjectTracker.getObject(readString(dis)) + case _ = throw new IllegalArgumentException(sInvalid type $dataType) +} + } + + def readBytes(in: DataInputStream): Array[Byte] = { +val len = readInt(in) +val out = new Array[Byte](len) +val bytesRead = in.readFully(out) +out + } + + def readInt(in: DataInputStream): Int = { +in.readInt() + } + + def readDouble(in: DataInputStream): Double = { +in.readDouble() + } + + def readString(in: DataInputStream): String = { +val len = in.readInt() +val asciiBytes = new Array[Byte](len) +in.readFully(asciiBytes) +assert(asciiBytes(len - 1) == 0) +val str = new String(asciiBytes.dropRight(1).map(_.toChar)) +str + } + + def readBoolean(in: DataInputStream): Boolean = { +val intVal = in.readInt() +if (intVal == 0) false else true + } + + def readDate(in: DataInputStream): Date = { +Date.valueOf(readString(in)) + } + + def readTime(in: DataInputStream): Time = { +val t = in.readDouble() +new Time((t * 1000L).toLong) + } + + def readBytesArr(in: DataInputStream): Array[Array[Byte]] = { +val len = readInt(in) +(0 until len).map(_ = readBytes(in)).toArray + } + + def readIntArr(in: DataInputStream): Array[Int] = { +val len = readInt(in) +(0 until len).map(_ = readInt(in)).toArray + } + + def readDoubleArr(in: DataInputStream): Array[Double] = { +val len = readInt(in) +(0 until len).map(_ = readDouble(in)).toArray + } + + def readBooleanArr(in: DataInputStream): Array[Boolean] = { +val len = readInt(in) +(0 until len).map(_ = readBoolean(in)).toArray + } + + def readStringArr(in: DataInputStream): Array[String] = { +val len = readInt(in) +(0 until len).map(_ = readString(in)).toArray + } + + def readList(dis: DataInputStream): Array[_] = { +val arrType = readObjectType(dis) +arrType match { + case 'i' = readIntArr(dis) + case 'c' = readStringArr(dis) + case 'd' = readDoubleArr(dis) + case 'b' = readBooleanArr(dis) + case 'j' = readStringArr(dis).map(x = JVMObjectTracker.getObject(x)) + case 'r' = readBytesArr(dis) + case _ = throw new
spark git commit: [SPARK-6246] [EC2] fixed support for more than 100 nodes
Repository: spark Updated Branches: refs/heads/master bcb1ff814 - 2bc5e0616 [SPARK-6246] [EC2] fixed support for more than 100 nodes This is a small fix. But it is important for amazon users because as the ticket states, spark-ec2 can't handle clusters with 100 nodes now. Author: alyaxey oleksii.sliusare...@grammarly.com Closes #6267 from alyaxey/ec2_100_nodes_fix and squashes the following commits: 1e0d747 [alyaxey] [SPARK-6246] fixed support for more than 100 nodes Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2bc5e061 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2bc5e061 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2bc5e061 Branch: refs/heads/master Commit: 2bc5e0616d878b09daa8e31a7a1fdb7127bca079 Parents: bcb1ff8 Author: alyaxey oleksii.sliusare...@grammarly.com Authored: Tue May 19 16:45:52 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Tue May 19 16:45:52 2015 -0700 -- ec2/spark_ec2.py | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2bc5e061/ec2/spark_ec2.py -- diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index be92d5f..c6d5a1f 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -864,7 +864,11 @@ def wait_for_cluster_state(conn, opts, cluster_instances, cluster_state): for i in cluster_instances: i.update() -statuses = conn.get_all_instance_status(instance_ids=[i.id for i in cluster_instances]) +max_batch = 100 +statuses = [] +for j in xrange(0, len(cluster_instances), max_batch): +batch = [i.id for i in cluster_instances[j:j + max_batch]] +statuses.extend(conn.get_all_instance_status(instance_ids=batch)) if cluster_state == 'ssh-ready': if all(i.state == 'running' for i in cluster_instances) and \ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [MINOR] Add 1.3, 1.3.1 to master branch EC2 scripts
Repository: spark Updated Branches: refs/heads/master ba4f8ca0d - 1a7b9ce80 [MINOR] Add 1.3, 1.3.1 to master branch EC2 scripts cc pwendell P.S: I can't believe this was outdated all along ? Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Closes #6215 from shivaram/update-ec2-map and squashes the following commits: ae3937a [Shivaram Venkataraman] Add 1.3, 1.3.1 to master branch EC2 scripts Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1a7b9ce8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1a7b9ce8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1a7b9ce8 Branch: refs/heads/master Commit: 1a7b9ce80bb5649796dda48d6a6d662a2809d0ef Parents: ba4f8ca Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Authored: Sun May 17 00:12:20 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Sun May 17 00:12:20 2015 -0700 -- ec2/spark_ec2.py | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1a7b9ce8/ec2/spark_ec2.py -- diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index ab4a96f..be92d5f 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -48,7 +48,7 @@ else: from urllib.request import urlopen, Request from urllib.error import HTTPError -SPARK_EC2_VERSION = 1.2.1 +SPARK_EC2_VERSION = 1.3.1 SPARK_EC2_DIR = os.path.dirname(os.path.realpath(__file__)) VALID_SPARK_VERSIONS = set([ @@ -65,6 +65,8 @@ VALID_SPARK_VERSIONS = set([ 1.1.1, 1.2.0, 1.2.1, +1.3.0, +1.3.1, ]) SPARK_TACHYON_MAP = { @@ -75,6 +77,8 @@ SPARK_TACHYON_MAP = { 1.1.1: 0.5.0, 1.2.0: 0.5.0, 1.2.1: 0.5.0, +1.3.0: 0.5.0, +1.3.1: 0.5.0, } DEFAULT_SPARK_VERSION = SPARK_EC2_VERSION - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [MINOR] Add 1.3, 1.3.1 to master branch EC2 scripts
Repository: spark Updated Branches: refs/heads/branch-1.4 671a6bca5 - 0ed376afa [MINOR] Add 1.3, 1.3.1 to master branch EC2 scripts cc pwendell P.S: I can't believe this was outdated all along ? Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Closes #6215 from shivaram/update-ec2-map and squashes the following commits: ae3937a [Shivaram Venkataraman] Add 1.3, 1.3.1 to master branch EC2 scripts (cherry picked from commit 1a7b9ce80bb5649796dda48d6a6d662a2809d0ef) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0ed376af Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0ed376af Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0ed376af Branch: refs/heads/branch-1.4 Commit: 0ed376afad603b7afd86bb8eb312cad6edae2b9c Parents: 671a6bc Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Authored: Sun May 17 00:12:20 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Sun May 17 00:12:46 2015 -0700 -- ec2/spark_ec2.py | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0ed376af/ec2/spark_ec2.py -- diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index ab4a96f..be92d5f 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -48,7 +48,7 @@ else: from urllib.request import urlopen, Request from urllib.error import HTTPError -SPARK_EC2_VERSION = 1.2.1 +SPARK_EC2_VERSION = 1.3.1 SPARK_EC2_DIR = os.path.dirname(os.path.realpath(__file__)) VALID_SPARK_VERSIONS = set([ @@ -65,6 +65,8 @@ VALID_SPARK_VERSIONS = set([ 1.1.1, 1.2.0, 1.2.1, +1.3.0, +1.3.1, ]) SPARK_TACHYON_MAP = { @@ -75,6 +77,8 @@ SPARK_TACHYON_MAP = { 1.1.1: 0.5.0, 1.2.0: 0.5.0, 1.2.1: 0.5.0, +1.3.0: 0.5.0, +1.3.1: 0.5.0, } DEFAULT_SPARK_VERSION = SPARK_EC2_VERSION - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8063] [SPARKR] Spark master URL conflict between MASTER env variable and --master command line option.
Repository: spark Updated Branches: refs/heads/master d2a86eb8f - 708c63bbb [SPARK-8063] [SPARKR] Spark master URL conflict between MASTER env variable and --master command line option. Author: Sun Rui rui@intel.com Closes #6605 from sun-rui/SPARK-8063 and squashes the following commits: 51ca48b [Sun Rui] [SPARK-8063][SPARKR] Spark master URL conflict between MASTER env variable and --master command line option. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/708c63bb Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/708c63bb Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/708c63bb Branch: refs/heads/master Commit: 708c63bbbe9580eb774fe47e23ef61338103afda Parents: d2a86eb Author: Sun Rui rui@intel.com Authored: Wed Jun 3 11:56:35 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Wed Jun 3 11:56:35 2015 -0700 -- R/pkg/inst/profile/shell.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/708c63bb/R/pkg/inst/profile/shell.R -- diff --git a/R/pkg/inst/profile/shell.R b/R/pkg/inst/profile/shell.R index ca94f1d..773b6ec 100644 --- a/R/pkg/inst/profile/shell.R +++ b/R/pkg/inst/profile/shell.R @@ -24,7 +24,7 @@ old - getOption(defaultPackages) options(defaultPackages = c(old, SparkR)) - sc - SparkR::sparkR.init(Sys.getenv(MASTER, unset = )) + sc - SparkR::sparkR.init() assign(sc, sc, envir=.GlobalEnv) sqlContext - SparkR::sparkRSQL.init(sc) assign(sqlContext, sqlContext, envir=.GlobalEnv) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8063] [SPARKR] Spark master URL conflict between MASTER env variable and --master command line option.
Repository: spark Updated Branches: refs/heads/branch-1.4 0a1dad6cd - f67a27d02 [SPARK-8063] [SPARKR] Spark master URL conflict between MASTER env variable and --master command line option. Author: Sun Rui rui@intel.com Closes #6605 from sun-rui/SPARK-8063 and squashes the following commits: 51ca48b [Sun Rui] [SPARK-8063][SPARKR] Spark master URL conflict between MASTER env variable and --master command line option. (cherry picked from commit 708c63bbbe9580eb774fe47e23ef61338103afda) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f67a27d0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f67a27d0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f67a27d0 Branch: refs/heads/branch-1.4 Commit: f67a27d02699af24d5a2ccb843954a643a7ba078 Parents: 0a1dad6 Author: Sun Rui rui@intel.com Authored: Wed Jun 3 11:56:35 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Wed Jun 3 11:57:00 2015 -0700 -- R/pkg/inst/profile/shell.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f67a27d0/R/pkg/inst/profile/shell.R -- diff --git a/R/pkg/inst/profile/shell.R b/R/pkg/inst/profile/shell.R index ca94f1d..773b6ec 100644 --- a/R/pkg/inst/profile/shell.R +++ b/R/pkg/inst/profile/shell.R @@ -24,7 +24,7 @@ old - getOption(defaultPackages) options(defaultPackages = c(old, SparkR)) - sc - SparkR::sparkR.init(Sys.getenv(MASTER, unset = )) + sc - SparkR::sparkR.init() assign(sc, sc, envir=.GlobalEnv) sqlContext - SparkR::sparkRSQL.init(sc) assign(sqlContext, sqlContext, envir=.GlobalEnv) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8084] [SPARKR] Make SparkR scripts fail on error
Repository: spark Updated Branches: refs/heads/branch-1.4 16748694b - c2c129073 [SPARK-8084] [SPARKR] Make SparkR scripts fail on error cc shaneknapp pwendell JoshRosen Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Closes #6623 from shivaram/SPARK-8084 and squashes the following commits: 0ec5b26 [Shivaram Venkataraman] Make SparkR scripts fail on error (cherry picked from commit 0576c3c4ff9d9bbff208e915bee1ac0d4956548c) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c2c12907 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c2c12907 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c2c12907 Branch: refs/heads/branch-1.4 Commit: c2c129073f97de5c35532177b0811ff0892429b2 Parents: 1674869 Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Authored: Wed Jun 3 17:02:16 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Wed Jun 3 17:02:29 2015 -0700 -- R/create-docs.sh | 3 +++ R/install-dev.sh | 2 ++ 2 files changed, 5 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c2c12907/R/create-docs.sh -- diff --git a/R/create-docs.sh b/R/create-docs.sh index 4194172..af47c08 100755 --- a/R/create-docs.sh +++ b/R/create-docs.sh @@ -23,6 +23,9 @@ # After running this script the html docs can be found in # $SPARK_HOME/R/pkg/html +set -o pipefail +set -e + # Figure out where the script is export FWDIR=$(cd `dirname $0`; pwd) pushd $FWDIR http://git-wip-us.apache.org/repos/asf/spark/blob/c2c12907/R/install-dev.sh -- diff --git a/R/install-dev.sh b/R/install-dev.sh index 55ed6f4..b9e2527 100755 --- a/R/install-dev.sh +++ b/R/install-dev.sh @@ -26,6 +26,8 @@ # NOTE(shivaram): Right now we use $SPARK_HOME/R/lib to be the installation directory # to load the SparkR package on the worker nodes. +set -o pipefail +set -e FWDIR=$(cd `dirname $0`; pwd) LIB_DIR=$FWDIR/lib - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8084] [SPARKR] Make SparkR scripts fail on error
Repository: spark Updated Branches: refs/heads/master 51898b515 - 0576c3c4f [SPARK-8084] [SPARKR] Make SparkR scripts fail on error cc shaneknapp pwendell JoshRosen Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Closes #6623 from shivaram/SPARK-8084 and squashes the following commits: 0ec5b26 [Shivaram Venkataraman] Make SparkR scripts fail on error Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0576c3c4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0576c3c4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0576c3c4 Branch: refs/heads/master Commit: 0576c3c4ff9d9bbff208e915bee1ac0d4956548c Parents: 51898b5 Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Authored: Wed Jun 3 17:02:16 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Wed Jun 3 17:02:16 2015 -0700 -- R/create-docs.sh | 3 +++ R/install-dev.sh | 2 ++ 2 files changed, 5 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0576c3c4/R/create-docs.sh -- diff --git a/R/create-docs.sh b/R/create-docs.sh index 4194172..af47c08 100755 --- a/R/create-docs.sh +++ b/R/create-docs.sh @@ -23,6 +23,9 @@ # After running this script the html docs can be found in # $SPARK_HOME/R/pkg/html +set -o pipefail +set -e + # Figure out where the script is export FWDIR=$(cd `dirname $0`; pwd) pushd $FWDIR http://git-wip-us.apache.org/repos/asf/spark/blob/0576c3c4/R/install-dev.sh -- diff --git a/R/install-dev.sh b/R/install-dev.sh index 55ed6f4..b9e2527 100755 --- a/R/install-dev.sh +++ b/R/install-dev.sh @@ -26,6 +26,8 @@ # NOTE(shivaram): Right now we use $SPARK_HOME/R/lib to be the installation directory # to load the SparkR package on the worker nodes. +set -o pipefail +set -e FWDIR=$(cd `dirname $0`; pwd) LIB_DIR=$FWDIR/lib - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-6820] [SPARKR] Convert NAs to null type in SparkR DataFrames
Repository: spark Updated Branches: refs/heads/master 82870d507 - a5c52c1a3 [SPARK-6820] [SPARKR] Convert NAs to null type in SparkR DataFrames Author: hqzizania qian.hu...@intel.com Closes #6190 from hqzizania/R and squashes the following commits: 1641f9e [hqzizania] fixes and add test units bb3411a [hqzizania] Convert NAs to null type in SparkR DataFrames Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a5c52c1a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a5c52c1a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a5c52c1a Branch: refs/heads/master Commit: a5c52c1a3488b69bec19e460d2d1fdb0c9ada58d Parents: 82870d5 Author: hqzizania qian.hu...@intel.com Authored: Mon Jun 8 21:40:12 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Mon Jun 8 21:40:12 2015 -0700 -- R/pkg/R/serialize.R | 8 R/pkg/inst/tests/test_sparkSQL.R | 37 +++ 2 files changed, 45 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a5c52c1a/R/pkg/R/serialize.R -- diff --git a/R/pkg/R/serialize.R b/R/pkg/R/serialize.R index 2081786..3169d79 100644 --- a/R/pkg/R/serialize.R +++ b/R/pkg/R/serialize.R @@ -37,6 +37,14 @@ writeObject - function(con, object, writeType = TRUE) { # passing in vectors as arrays and instead require arrays to be passed # as lists. type - class(object)[[1]] # class of POSIXlt is c(POSIXlt, POSIXt) + # Checking types is needed here, since ‘is.na’ only handles atomic vectors, + # lists and pairlists + if (type %in% c(integer, character, logical, double, numeric)) { +if (is.na(object)) { + object - NULL + type - NULL +} + } if (writeType) { writeType(con, type) } http://git-wip-us.apache.org/repos/asf/spark/blob/a5c52c1a/R/pkg/inst/tests/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index 30edfc8..8946348 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -101,6 +101,43 @@ test_that(create DataFrame from RDD, { expect_equal(dtypes(df), list(c(a, int), c(b, string))) }) +test_that(convert NAs to null type in DataFrames, { + rdd - parallelize(sc, list(list(1L, 2L), list(NA, 4L))) + df - createDataFrame(sqlContext, rdd, list(a, b)) + expect_true(is.na(collect(df)[2, a])) + expect_equal(collect(df)[2, b], 4L) + + l - data.frame(x = 1L, y = c(1L, NA_integer_, 3L)) + df - createDataFrame(sqlContext, l) + expect_equal(collect(df)[2, x], 1L) + expect_true(is.na(collect(df)[2, y])) + + rdd - parallelize(sc, list(list(1, 2), list(NA, 4))) + df - createDataFrame(sqlContext, rdd, list(a, b)) + expect_true(is.na(collect(df)[2, a])) + expect_equal(collect(df)[2, b], 4) + + l - data.frame(x = 1, y = c(1, NA_real_, 3)) + df - createDataFrame(sqlContext, l) + expect_equal(collect(df)[2, x], 1) + expect_true(is.na(collect(df)[2, y])) + + l - list(a, b, NA, d) + df - createDataFrame(sqlContext, l) + expect_true(is.na(collect(df)[3, _1])) + expect_equal(collect(df)[4, _1], d) + + l - list(a, b, NA_character_, d) + df - createDataFrame(sqlContext, l) + expect_true(is.na(collect(df)[3, _1])) + expect_equal(collect(df)[4, _1], d) + + l - list(TRUE, FALSE, NA, TRUE) + df - createDataFrame(sqlContext, l) + expect_true(is.na(collect(df)[3, _1])) + expect_equal(collect(df)[4, _1], TRUE) +}) + test_that(toDF, { rdd - lapply(parallelize(sc, 1:10), function(x) { list(x, as.character(x)) }) df - toDF(rdd, list(a, b)) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8085] [SPARKR] Support user-specified schema in read.df
Repository: spark Updated Branches: refs/heads/branch-1.4 0ef2e9d35 - 3e3151e75 [SPARK-8085] [SPARKR] Support user-specified schema in read.df cc davies sun-rui Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Closes #6620 from shivaram/sparkr-read-schema and squashes the following commits: 16a6726 [Shivaram Venkataraman] Fix loadDF to pass schema Also add a unit test a229877 [Shivaram Venkataraman] Use wrapper function to DataFrameReader ee70ba8 [Shivaram Venkataraman] Support user-specified schema in read.df (cherry picked from commit 12f5eaeee1235850a076ce5716d069bd2f1205a5) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3e3151e7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3e3151e7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3e3151e7 Branch: refs/heads/branch-1.4 Commit: 3e3151e755dd68aa9a75188d6ecb968c7c1dff24 Parents: 0ef2e9d Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Authored: Fri Jun 5 10:19:03 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Fri Jun 5 10:19:15 2015 -0700 -- R/pkg/R/SQLContext.R | 14 ++ R/pkg/inst/tests/test_sparkSQL.R | 13 + .../scala/org/apache/spark/sql/api/r/SQLUtils.scala | 15 +++ 3 files changed, 38 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3e3151e7/R/pkg/R/SQLContext.R -- diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 88e1a50..22a4b5b 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -452,7 +452,7 @@ dropTempTable - function(sqlContext, tableName) { #' df - read.df(sqlContext, path/to/file.json, source = json) #' } -read.df - function(sqlContext, path = NULL, source = NULL, ...) { +read.df - function(sqlContext, path = NULL, source = NULL, schema = NULL, ...) { options - varargsToEnv(...) if (!is.null(path)) { options[['path']] - path @@ -462,15 +462,21 @@ read.df - function(sqlContext, path = NULL, source = NULL, ...) { source - callJMethod(sqlContext, getConf, spark.sql.sources.default, org.apache.spark.sql.parquet) } - sdf - callJMethod(sqlContext, load, source, options) + if (!is.null(schema)) { +stopifnot(class(schema) == structType) +sdf - callJStatic(org.apache.spark.sql.api.r.SQLUtils, loadDF, sqlContext, source, + schema$jobj, options) + } else { +sdf - callJStatic(org.apache.spark.sql.api.r.SQLUtils, loadDF, sqlContext, source, options) + } dataFrame(sdf) } #' @aliases loadDF #' @export -loadDF - function(sqlContext, path = NULL, source = NULL, ...) { - read.df(sqlContext, path, source, ...) +loadDF - function(sqlContext, path = NULL, source = NULL, schema = NULL, ...) { + read.df(sqlContext, path, source, schema, ...) } #' Create an external table http://git-wip-us.apache.org/repos/asf/spark/blob/3e3151e7/R/pkg/inst/tests/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index d2d82e7..30edfc8 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -504,6 +504,19 @@ test_that(read.df() from json file, { df - read.df(sqlContext, jsonPath, json) expect_true(inherits(df, DataFrame)) expect_true(count(df) == 3) + + # Check if we can apply a user defined schema + schema - structType(structField(name, type = string), + structField(age, type = double)) + + df1 - read.df(sqlContext, jsonPath, json, schema) + expect_true(inherits(df1, DataFrame)) + expect_equal(dtypes(df1), list(c(name, string), c(age, double))) + + # Run the same with loadDF + df2 - loadDF(sqlContext, jsonPath, json, schema) + expect_true(inherits(df2, DataFrame)) + expect_equal(dtypes(df2), list(c(name, string), c(age, double))) }) test_that(write.df() as parquet file, { http://git-wip-us.apache.org/repos/asf/spark/blob/3e3151e7/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala index 604f312..43b62f0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala @@ -139,4 +139,19 @@ private[r] object SQLUtils { case ignore = SaveMode.Ignore } } + + def loadDF( + sqlContext: SQLContext
spark git commit: [SPARK-8027] [SPARKR] Move man pages creation to install-dev.sh
Repository: spark Updated Branches: refs/heads/branch-1.4 81ff7a901 - 0b71b851d [SPARK-8027] [SPARKR] Move man pages creation to install-dev.sh This also helps us get rid of the sparkr-docs maven profile as docs are now built by just using -Psparkr when the roxygen2 package is available Related to discussion in #6567 cc pwendell srowen -- Let me know if this looks better Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Closes #6593 from shivaram/sparkr-pom-cleanup and squashes the following commits: b282241 [Shivaram Venkataraman] Remove sparkr-docs from release script as well 8f100a5 [Shivaram Venkataraman] Move man pages creation to install-dev.sh This also helps us get rid of the sparkr-docs maven profile as docs are now built by just using -Psparkr when the roxygen2 package is available (cherry picked from commit 3dc005282a694e105f40e429b28b0a677743341f) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0b71b851 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0b71b851 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0b71b851 Branch: refs/heads/branch-1.4 Commit: 0b71b851de8a1f97fe764b668337474661ee014e Parents: 81ff7a9 Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Authored: Thu Jun 4 12:52:16 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Thu Jun 4 12:52:45 2015 -0700 -- R/create-docs.sh | 5 + R/install-dev.sh | 9 - core/pom.xml | 23 --- dev/create-release/create-release.sh | 16 4 files changed, 17 insertions(+), 36 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0b71b851/R/create-docs.sh -- diff --git a/R/create-docs.sh b/R/create-docs.sh index af47c08..6a4687b 100755 --- a/R/create-docs.sh +++ b/R/create-docs.sh @@ -30,10 +30,7 @@ set -e export FWDIR=$(cd `dirname $0`; pwd) pushd $FWDIR -# Generate Rd file -Rscript -e 'library(devtools); devtools::document(pkg=./pkg, roclets=c(rd))' - -# Install the package +# Install the package (this will also generate the Rd files) ./install-dev.sh # Now create HTML files http://git-wip-us.apache.org/repos/asf/spark/blob/0b71b851/R/install-dev.sh -- diff --git a/R/install-dev.sh b/R/install-dev.sh index b9e2527..1edd551 100755 --- a/R/install-dev.sh +++ b/R/install-dev.sh @@ -34,5 +34,12 @@ LIB_DIR=$FWDIR/lib mkdir -p $LIB_DIR -# Install R +pushd $FWDIR + +# Generate Rd files if devtools is installed +Rscript -e ' if(devtools %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg=./pkg, roclets=c(rd)) }' + +# Install SparkR to $LIB_DIR R CMD INSTALL --library=$LIB_DIR $FWDIR/pkg/ + +popd http://git-wip-us.apache.org/repos/asf/spark/blob/0b71b851/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index a021842..5c02be8 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -481,29 +481,6 @@ /plugins /build /profile -profile - idsparkr-docs/id - build -plugins - plugin -groupIdorg.codehaus.mojo/groupId -artifactIdexec-maven-plugin/artifactId -executions - execution -idsparkr-pkg-docs/id -phasecompile/phase -goals - goalexec/goal -/goals - /execution -/executions -configuration - executable..${path.separator}R${path.separator}create-docs${script.extension}/executable -/configuration - /plugin -/plugins - /build -/profile /profiles /project http://git-wip-us.apache.org/repos/asf/spark/blob/0b71b851/dev/create-release/create-release.sh -- diff --git a/dev/create-release/create-release.sh b/dev/create-release/create-release.sh index 0b14a61..54274a8 100755 --- a/dev/create-release/create-release.sh +++ b/dev/create-release/create-release.sh @@ -228,14 +228,14 @@ if [[ ! $@ =~ --skip-package ]]; then # We increment the Zinc port each time to avoid OOM's and other craziness if multiple builds # share the same Zinc server. - make_binary_release hadoop1 -Psparkr -Psparkr-docs -Phadoop-1 -Phive -Phive-thriftserver 3030 - make_binary_release hadoop1-scala2.11 -Psparkr -Psparkr-docs -Phadoop-1 -Phive -Dscala-2.11 3031 - make_binary_release cdh4 -Psparkr -Psparkr-docs -Phadoop-1 -Phive -Phive
spark git commit: [SPARK-8027] [SPARKR] Move man pages creation to install-dev.sh
Repository: spark Updated Branches: refs/heads/master cd3176bd8 - 3dc005282 [SPARK-8027] [SPARKR] Move man pages creation to install-dev.sh This also helps us get rid of the sparkr-docs maven profile as docs are now built by just using -Psparkr when the roxygen2 package is available Related to discussion in #6567 cc pwendell srowen -- Let me know if this looks better Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Closes #6593 from shivaram/sparkr-pom-cleanup and squashes the following commits: b282241 [Shivaram Venkataraman] Remove sparkr-docs from release script as well 8f100a5 [Shivaram Venkataraman] Move man pages creation to install-dev.sh This also helps us get rid of the sparkr-docs maven profile as docs are now built by just using -Psparkr when the roxygen2 package is available Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3dc00528 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3dc00528 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3dc00528 Branch: refs/heads/master Commit: 3dc005282a694e105f40e429b28b0a677743341f Parents: cd3176b Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Authored: Thu Jun 4 12:52:16 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Thu Jun 4 12:52:16 2015 -0700 -- R/create-docs.sh | 5 + R/install-dev.sh | 9 - core/pom.xml | 23 --- dev/create-release/create-release.sh | 16 4 files changed, 17 insertions(+), 36 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3dc00528/R/create-docs.sh -- diff --git a/R/create-docs.sh b/R/create-docs.sh index af47c08..6a4687b 100755 --- a/R/create-docs.sh +++ b/R/create-docs.sh @@ -30,10 +30,7 @@ set -e export FWDIR=$(cd `dirname $0`; pwd) pushd $FWDIR -# Generate Rd file -Rscript -e 'library(devtools); devtools::document(pkg=./pkg, roclets=c(rd))' - -# Install the package +# Install the package (this will also generate the Rd files) ./install-dev.sh # Now create HTML files http://git-wip-us.apache.org/repos/asf/spark/blob/3dc00528/R/install-dev.sh -- diff --git a/R/install-dev.sh b/R/install-dev.sh index b9e2527..1edd551 100755 --- a/R/install-dev.sh +++ b/R/install-dev.sh @@ -34,5 +34,12 @@ LIB_DIR=$FWDIR/lib mkdir -p $LIB_DIR -# Install R +pushd $FWDIR + +# Generate Rd files if devtools is installed +Rscript -e ' if(devtools %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg=./pkg, roclets=c(rd)) }' + +# Install SparkR to $LIB_DIR R CMD INSTALL --library=$LIB_DIR $FWDIR/pkg/ + +popd http://git-wip-us.apache.org/repos/asf/spark/blob/3dc00528/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index e35694e..40a64be 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -481,29 +481,6 @@ /plugins /build /profile -profile - idsparkr-docs/id - build -plugins - plugin -groupIdorg.codehaus.mojo/groupId -artifactIdexec-maven-plugin/artifactId -executions - execution -idsparkr-pkg-docs/id -phasecompile/phase -goals - goalexec/goal -/goals - /execution -/executions -configuration - executable..${path.separator}R${path.separator}create-docs${script.extension}/executable -/configuration - /plugin -/plugins - /build -/profile /profiles /project http://git-wip-us.apache.org/repos/asf/spark/blob/3dc00528/dev/create-release/create-release.sh -- diff --git a/dev/create-release/create-release.sh b/dev/create-release/create-release.sh index 0b14a61..54274a8 100755 --- a/dev/create-release/create-release.sh +++ b/dev/create-release/create-release.sh @@ -228,14 +228,14 @@ if [[ ! $@ =~ --skip-package ]]; then # We increment the Zinc port each time to avoid OOM's and other craziness if multiple builds # share the same Zinc server. - make_binary_release hadoop1 -Psparkr -Psparkr-docs -Phadoop-1 -Phive -Phive-thriftserver 3030 - make_binary_release hadoop1-scala2.11 -Psparkr -Psparkr-docs -Phadoop-1 -Phive -Dscala-2.11 3031 - make_binary_release cdh4 -Psparkr -Psparkr-docs -Phadoop-1 -Phive -Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0 3032 - make_binary_release hadoop2.3 -Psparkr -Psparkr-docs -Phadoop-2.3 -Phive -Phive
spark git commit: [SPARK-8482] Added M4 instances to the list.
Repository: spark Updated Branches: refs/heads/master 42a1f716f - ba8a4537f [SPARK-8482] Added M4 instances to the list. AWS recently added M4 instances (https://aws.amazon.com/blogs/aws/the-new-m4-instance-type-bonus-price-reduction-on-m3-c4/). Author: Pradeep Chhetri pradeep.chhetr...@gmail.com Closes #6899 from pradeepchhetri/master and squashes the following commits: 4f4ea79 [Pradeep Chhetri] Added t2.large instance 3d2bb6c [Pradeep Chhetri] Added M4 instances to the list Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ba8a4537 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ba8a4537 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ba8a4537 Branch: refs/heads/master Commit: ba8a4537fee7d85f968cccf8d1c607731daae307 Parents: 42a1f71 Author: Pradeep Chhetri pradeep.chhetr...@gmail.com Authored: Mon Jun 22 11:45:31 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Mon Jun 22 11:45:31 2015 -0700 -- ec2/spark_ec2.py | 16 ++-- 1 file changed, 14 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ba8a4537/ec2/spark_ec2.py -- diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 1037356..63e2c79 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -362,7 +362,7 @@ def get_validate_spark_version(version, repo): # Source: http://aws.amazon.com/amazon-linux-ami/instance-type-matrix/ -# Last Updated: 2015-05-08 +# Last Updated: 2015-06-19 # For easy maintainability, please keep this manually-inputted dictionary sorted by key. EC2_INSTANCE_TYPES = { c1.medium: pvm, @@ -404,6 +404,11 @@ EC2_INSTANCE_TYPES = { m3.large:hvm, m3.xlarge: hvm, m3.2xlarge: hvm, +m4.large:hvm, +m4.xlarge: hvm, +m4.2xlarge: hvm, +m4.4xlarge: hvm, +m4.10xlarge: hvm, r3.large:hvm, r3.xlarge: hvm, r3.2xlarge: hvm, @@ -413,6 +418,7 @@ EC2_INSTANCE_TYPES = { t2.micro:hvm, t2.small:hvm, t2.medium: hvm, +t2.large:hvm, } @@ -923,7 +929,7 @@ def wait_for_cluster_state(conn, opts, cluster_instances, cluster_state): # Get number of local disks available for a given EC2 instance type. def get_num_disks(instance_type): # Source: http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html -# Last Updated: 2015-05-08 +# Last Updated: 2015-06-19 # For easy maintainability, please keep this manually-inputted dictionary sorted by key. disks_by_instance = { c1.medium: 1, @@ -965,6 +971,11 @@ def get_num_disks(instance_type): m3.large:1, m3.xlarge: 2, m3.2xlarge: 2, +m4.large:0, +m4.xlarge: 0, +m4.2xlarge: 0, +m4.4xlarge: 0, +m4.10xlarge: 0, r3.large:1, r3.xlarge: 1, r3.2xlarge: 1, @@ -974,6 +985,7 @@ def get_num_disks(instance_type): t2.micro:0, t2.small:0, t2.medium: 0, +t2.large:0, } if instance_type in disks_by_instance: return disks_by_instance[instance_type] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8537] [SPARKR] Add a validation rule about the curly braces in SparkR to `.lintr`
Repository: spark Updated Branches: refs/heads/master afe35f051 - b1f3a489e [SPARK-8537] [SPARKR] Add a validation rule about the curly braces in SparkR to `.lintr` [[SPARK-8537] Add a validation rule about the curly braces in SparkR to `.lintr` - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-8537) Author: Yu ISHIKAWA yuu.ishik...@gmail.com Closes #6940 from yu-iskw/SPARK-8537 and squashes the following commits: 7eec1a0 [Yu ISHIKAWA] [SPARK-8537][SparkR] Add a validation rule about the curly braces in SparkR to `.lintr` Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b1f3a489 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b1f3a489 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b1f3a489 Branch: refs/heads/master Commit: b1f3a489efc6f4f9d172344c3345b9b38ae235e0 Parents: afe35f0 Author: Yu ISHIKAWA yuu.ishik...@gmail.com Authored: Mon Jun 22 14:35:38 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Mon Jun 22 14:35:38 2015 -0700 -- R/pkg/.lintr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b1f3a489/R/pkg/.lintr -- diff --git a/R/pkg/.lintr b/R/pkg/.lintr index b10ebd3..038236f 100644 --- a/R/pkg/.lintr +++ b/R/pkg/.lintr @@ -1,2 +1,2 @@ -linters: with_defaults(line_length_linter(100), camel_case_linter = NULL) +linters: with_defaults(line_length_linter(100), camel_case_linter = NULL, open_curly_linter(allow_single_line = TRUE), closed_curly_linter(allow_single_line = TRUE)) exclusions: list(inst/profile/general.R = 1, inst/profile/shell.R) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8111] [SPARKR] SparkR shell should display Spark logo and version banner on startup.
Repository: spark Updated Branches: refs/heads/master f2022fa0d - f2fb0285a [SPARK-8111] [SPARKR] SparkR shell should display Spark logo and version banner on startup. spark version is taken from the environment variable SPARK_VERSION Author: Alok Singh singhal@Aloks-MacBook-Pro.local Author: Alok Singh sing...@aloks-mbp.usca.ibm.com Closes #6944 from aloknsingh/aloknsingh_spark_jiras and squashes the following commits: ed607bd [Alok Singh] [SPARK-8111][SparkR] As per suggestion, 1) using the version from sparkContext rather than the Sys.env. 2) change Welcome to SparkR! to Welcome to followed by Spark logo and version acd5b85 [Alok Singh] fix the jira SPARK-8111 to add the spark version and logo. Currently spark version is taken from the environment variable SPARK_VERSION Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f2fb0285 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f2fb0285 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f2fb0285 Branch: refs/heads/master Commit: f2fb0285ab6d4225c5350f109dea6c1c017bb491 Parents: f2022fa Author: Alok Singh singhal@Aloks-MacBook-Pro.local Authored: Tue Jun 23 12:47:55 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Tue Jun 23 12:47:55 2015 -0700 -- R/pkg/inst/profile/shell.R | 16 +++- 1 file changed, 15 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f2fb0285/R/pkg/inst/profile/shell.R -- diff --git a/R/pkg/inst/profile/shell.R b/R/pkg/inst/profile/shell.R index 773b6ec..7189f1a 100644 --- a/R/pkg/inst/profile/shell.R +++ b/R/pkg/inst/profile/shell.R @@ -27,7 +27,21 @@ sc - SparkR::sparkR.init() assign(sc, sc, envir=.GlobalEnv) sqlContext - SparkR::sparkRSQL.init(sc) + sparkVer - SparkR:::callJMethod(sc, version) assign(sqlContext, sqlContext, envir=.GlobalEnv) - cat(\n Welcome to SparkR!) + cat(\n Welcome to) + cat(\n) + cat( __, \n) + cat( / __/__ ___ _/ /__, \n) + cat( _\\ \\/ _ \\/ _ `/ __/ '_/, \n) + cat( /___/ .__/\\_,_/_/ /_/\\_\\) + if (nchar(sparkVer) == 0) { +cat(\n) + } else { +cat( version , sparkVer, \n) + } + cat(/_/, \n) + cat(\n) + cat(\n Spark context is available as sc, SQL context is available as sqlContext\n) } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8452] [SPARKR] expose jobGroup API in SparkR
Repository: spark Updated Branches: refs/heads/master 54976e55e - 1fa29c2df [SPARK-8452] [SPARKR] expose jobGroup API in SparkR This pull request adds following methods to SparkR: ```R setJobGroup() cancelJobGroup() clearJobGroup() ``` For each method, the spark context is passed as the first argument. There does not seem to be a good way to test these in R. cc shivaram and davies Author: Hossein hoss...@databricks.com Closes #6889 from falaki/SPARK-8452 and squashes the following commits: 9ce9f1e [Hossein] Added basic tests to verify methods can be called and won't throw errors c706af9 [Hossein] Added examples a2c19af [Hossein] taking spark context as first argument 343ca77 [Hossein] Added setJobGroup, cancelJobGroup and clearJobGroup to SparkR Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1fa29c2d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1fa29c2d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1fa29c2d Branch: refs/heads/master Commit: 1fa29c2df2a7846405eed6b409b8deb5329fa7c1 Parents: 54976e5 Author: Hossein hoss...@databricks.com Authored: Fri Jun 19 15:47:22 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Fri Jun 19 15:51:59 2015 -0700 -- R/pkg/NAMESPACE | 5 R/pkg/R/sparkR.R| 44 R/pkg/inst/tests/test_context.R | 7 ++ 3 files changed, 56 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1fa29c2d/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index f9447f6..7f85722 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -10,6 +10,11 @@ export(sparkR.init) export(sparkR.stop) export(print.jobj) +# Job group lifecycle management methods +export(setJobGroup, + clearJobGroup, + cancelJobGroup) + exportClasses(DataFrame) exportMethods(arrange, http://git-wip-us.apache.org/repos/asf/spark/blob/1fa29c2d/R/pkg/R/sparkR.R -- diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index 5ced7c6..2efd4f0 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -278,3 +278,47 @@ sparkRHive.init - function(jsc = NULL) { assign(.sparkRHivesc, hiveCtx, envir = .sparkREnv) hiveCtx } + +#' Assigns a group ID to all the jobs started by this thread until the group ID is set to a +#' different value or cleared. +#' +#' @param sc existing spark context +#' @param groupid the ID to be assigned to job groups +#' @param description description for the the job group ID +#' @param interruptOnCancel flag to indicate if the job is interrupted on job cancellation +#' @examples +#'\dontrun{ +#' sc - sparkR.init() +#' setJobGroup(sc, myJobGroup, My job group description, TRUE) +#'} + +setJobGroup - function(sc, groupId, description, interruptOnCancel) { + callJMethod(sc, setJobGroup, groupId, description, interruptOnCancel) +} + +#' Clear current job group ID and its description +#' +#' @param sc existing spark context +#' @examples +#'\dontrun{ +#' sc - sparkR.init() +#' clearJobGroup(sc) +#'} + +clearJobGroup - function(sc) { + callJMethod(sc, clearJobGroup) +} + +#' Cancel active jobs for the specified group +#' +#' @param sc existing spark context +#' @param groupId the ID of job group to be cancelled +#' @examples +#'\dontrun{ +#' sc - sparkR.init() +#' cancelJobGroup(sc, myJobGroup) +#'} + +cancelJobGroup - function(sc, groupId) { + callJMethod(sc, cancelJobGroup, groupId) +} http://git-wip-us.apache.org/repos/asf/spark/blob/1fa29c2d/R/pkg/inst/tests/test_context.R -- diff --git a/R/pkg/inst/tests/test_context.R b/R/pkg/inst/tests/test_context.R index e4aab37..513bbc8 100644 --- a/R/pkg/inst/tests/test_context.R +++ b/R/pkg/inst/tests/test_context.R @@ -48,3 +48,10 @@ test_that(rdd GC across sparkR.stop, { count(rdd3) count(rdd4) }) + +test_that(job group functions can be called, { + sc - sparkR.init() + setJobGroup(sc, groupId, job description, TRUE) + cancelJobGroup(sc, groupId) + clearJobGroup(sc) +}) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8637] [SPARKR] [HOTFIX] Fix packages argument, sparkSubmitBinName
Repository: spark Updated Branches: refs/heads/branch-1.4 13802163d - 6abb4fc8a [SPARK-8637] [SPARKR] [HOTFIX] Fix packages argument, sparkSubmitBinName cc cafreeman Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Closes #7022 from shivaram/sparkr-init-hotfix and squashes the following commits: 9178d15 [Shivaram Venkataraman] Fix packages argument, sparkSubmitBinName (cherry picked from commit c392a9efabcb1ec2a2c53f001ecdae33c245ba35) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6abb4fc8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6abb4fc8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6abb4fc8 Branch: refs/heads/branch-1.4 Commit: 6abb4fc8a426f2554158802dd93f3223b6e2a304 Parents: 1380216 Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Authored: Thu Jun 25 10:56:00 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Thu Jun 25 10:56:08 2015 -0700 -- R/pkg/R/client.R | 2 +- R/pkg/R/sparkR.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6abb4fc8/R/pkg/R/client.R -- diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R index cf2e5dd..78c7a30 100644 --- a/R/pkg/R/client.R +++ b/R/pkg/R/client.R @@ -57,7 +57,7 @@ generateSparkSubmitArgs - function(args, sparkHome, jars, sparkSubmitOpts, pack } launchBackend - function(args, sparkHome, jars, sparkSubmitOpts, packages) { - sparkSubmitBin - determineSparkSubmitBin() + sparkSubmitBinName - determineSparkSubmitBin() if (sparkHome != ) { sparkSubmitBin - file.path(sparkHome, bin, sparkSubmitBinName) } else { http://git-wip-us.apache.org/repos/asf/spark/blob/6abb4fc8/R/pkg/R/sparkR.R -- diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index 8f81d56..633b869 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -132,7 +132,7 @@ sparkR.init - function( sparkHome = sparkHome, jars = jars, sparkSubmitOpts = Sys.getenv(SPARKR_SUBMIT_ARGS, sparkr-shell), -sparkPackages = sparkPackages) +packages = sparkPackages) # wait atmost 100 seconds for JVM to launch wait - 0.1 for (i in 1:25) { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8637] [SPARKR] [HOTFIX] Fix packages argument, sparkSubmitBinName
Repository: spark Updated Branches: refs/heads/master 2519dcc33 - c392a9efa [SPARK-8637] [SPARKR] [HOTFIX] Fix packages argument, sparkSubmitBinName cc cafreeman Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Closes #7022 from shivaram/sparkr-init-hotfix and squashes the following commits: 9178d15 [Shivaram Venkataraman] Fix packages argument, sparkSubmitBinName Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c392a9ef Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c392a9ef Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c392a9ef Branch: refs/heads/master Commit: c392a9efabcb1ec2a2c53f001ecdae33c245ba35 Parents: 2519dcc Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Authored: Thu Jun 25 10:56:00 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Thu Jun 25 10:56:00 2015 -0700 -- R/pkg/R/client.R | 2 +- R/pkg/R/sparkR.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c392a9ef/R/pkg/R/client.R -- diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R index cf2e5dd..78c7a30 100644 --- a/R/pkg/R/client.R +++ b/R/pkg/R/client.R @@ -57,7 +57,7 @@ generateSparkSubmitArgs - function(args, sparkHome, jars, sparkSubmitOpts, pack } launchBackend - function(args, sparkHome, jars, sparkSubmitOpts, packages) { - sparkSubmitBin - determineSparkSubmitBin() + sparkSubmitBinName - determineSparkSubmitBin() if (sparkHome != ) { sparkSubmitBin - file.path(sparkHome, bin, sparkSubmitBinName) } else { http://git-wip-us.apache.org/repos/asf/spark/blob/c392a9ef/R/pkg/R/sparkR.R -- diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index 8f81d56..633b869 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -132,7 +132,7 @@ sparkR.init - function( sparkHome = sparkHome, jars = jars, sparkSubmitOpts = Sys.getenv(SPARKR_SUBMIT_ARGS, sparkr-shell), -sparkPackages = sparkPackages) +packages = sparkPackages) # wait atmost 100 seconds for JVM to launch wait - 0.1 for (i in 1:25) { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8429] [EC2] Add ability to set additional tags
Repository: spark Updated Branches: refs/heads/master 0818fdec3 - 42a1f716f [SPARK-8429] [EC2] Add ability to set additional tags Add the `--additional-tags` parameter that allows to set additional tags to all the created instances (masters and slaves). The user can specify multiple tags by separating them with a comma (`,`), while each tag name and value should be separated by a colon (`:`); for example, `Task:MySparkProject,Env:production` would add two tags, `Task` and `Env`, with the given values. Author: Stefano Parmesan s.parme...@gmail.com Closes #6857 from armisael/patch-1 and squashes the following commits: c5ac92c [Stefano Parmesan] python style (pep8) 8e614f1 [Stefano Parmesan] Set multiple tags in a single request bfc56af [Stefano Parmesan] Address SPARK-7900 by inceasing sleep time daf8615 [Stefano Parmesan] Add ability to set additional tags Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/42a1f716 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/42a1f716 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/42a1f716 Branch: refs/heads/master Commit: 42a1f716fa35533507784be5e9117a984a03e62d Parents: 0818fde Author: Stefano Parmesan s.parme...@gmail.com Authored: Mon Jun 22 11:43:10 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Mon Jun 22 11:43:10 2015 -0700 -- ec2/spark_ec2.py | 28 1 file changed, 20 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/42a1f716/ec2/spark_ec2.py -- diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 5608749..1037356 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -290,6 +290,10 @@ def parse_args(): --additional-security-group, type=string, default=, help=Additional security group to place the machines in) parser.add_option( +--additional-tags, type=string, default=, +help=Additional tags to set on the machines; tags are comma-separated, while name and + + value are colon separated; ex: \Task:MySparkProject,Env:production\) +parser.add_option( --copy-aws-credentials, action=store_true, default=False, help=Add AWS credentials to hadoop configuration to allow Spark to access S3) parser.add_option( @@ -684,16 +688,24 @@ def launch_cluster(conn, opts, cluster_name): # This wait time corresponds to SPARK-4983 print(Waiting for AWS to propagate instance metadata...) -time.sleep(5) -# Give the instances descriptive names +time.sleep(15) + +# Give the instances descriptive names and set additional tags +additional_tags = {} +if opts.additional_tags.strip(): +additional_tags = dict( +map(str.strip, tag.split(':', 1)) for tag in opts.additional_tags.split(',') +) + for master in master_nodes: -master.add_tag( -key='Name', -value='{cn}-master-{iid}'.format(cn=cluster_name, iid=master.id)) +master.add_tags( +dict(additional_tags, Name='{cn}-master-{iid}'.format(cn=cluster_name, iid=master.id)) +) + for slave in slave_nodes: -slave.add_tag( -key='Name', -value='{cn}-slave-{iid}'.format(cn=cluster_name, iid=slave.id)) +slave.add_tags( +dict(additional_tags, Name='{cn}-slave-{iid}'.format(cn=cluster_name, iid=slave.id)) +) # Return all the instances return (master_nodes, slave_nodes) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8548] [SPARKR] Remove the trailing whitespaces from the SparkR files
Repository: spark Updated Branches: refs/heads/branch-1.4 d73900a90 - 250179485 [SPARK-8548] [SPARKR] Remove the trailing whitespaces from the SparkR files [[SPARK-8548] Remove the trailing whitespaces from the SparkR files - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-8548) - This is the result of `lint-r` https://gist.github.com/yu-iskw/0019b37a2c1167f33986 Author: Yu ISHIKAWA yuu.ishik...@gmail.com Closes #6945 from yu-iskw/SPARK-8548 and squashes the following commits: 0bd567a [Yu ISHIKAWA] [SPARK-8548][SparkR] Remove the trailing whitespaces from the SparkR files (cherry picked from commit 44fa7df64daa55bd6eb1f2c219a9701b34e1c2a3) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/25017948 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/25017948 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/25017948 Branch: refs/heads/branch-1.4 Commit: 250179485b59f3015fd2f44934b6cb1d3669de80 Parents: d73900a Author: Yu ISHIKAWA yuu.ishik...@gmail.com Authored: Mon Jun 22 20:55:38 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Mon Jun 22 20:55:55 2015 -0700 -- R/pkg/R/DataFrame.R | 96 ++-- R/pkg/R/RDD.R | 48 +++--- R/pkg/R/SQLContext.R| 14 ++-- R/pkg/R/broadcast.R | 6 +- R/pkg/R/deserialize.R | 2 +- R/pkg/R/generics.R | 15 ++--- R/pkg/R/group.R | 1 - R/pkg/R/jobj.R | 2 +- R/pkg/R/pairRDD.R | 4 +- R/pkg/R/schema.R| 2 +- R/pkg/R/serialize.R | 2 +- R/pkg/R/sparkR.R| 6 +- R/pkg/R/utils.R | 48 +++--- R/pkg/R/zzz.R | 1 - R/pkg/inst/tests/test_binaryFile.R | 7 +- R/pkg/inst/tests/test_binary_function.R | 28 R/pkg/inst/tests/test_rdd.R | 12 ++-- R/pkg/inst/tests/test_shuffle.R | 28 R/pkg/inst/tests/test_sparkSQL.R| 28 R/pkg/inst/tests/test_take.R| 1 - R/pkg/inst/tests/test_textFile.R| 7 +- R/pkg/inst/tests/test_utils.R | 12 ++-- 22 files changed, 182 insertions(+), 188 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/25017948/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 0af5cb8..6feabf4 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -38,7 +38,7 @@ setClass(DataFrame, setMethod(initialize, DataFrame, function(.Object, sdf, isCached) { .Object@env - new.env() .Object@env$isCached - isCached - + .Object@sdf - sdf .Object }) @@ -55,11 +55,11 @@ dataFrame - function(sdf, isCached = FALSE) { DataFrame Methods ## #' Print Schema of a DataFrame -#' +#' #' Prints out the schema in tree format -#' +#' #' @param x A SparkSQL DataFrame -#' +#' #' @rdname printSchema #' @export #' @examples @@ -78,11 +78,11 @@ setMethod(printSchema, }) #' Get schema object -#' +#' #' Returns the schema of this DataFrame as a structType object. -#' +#' #' @param x A SparkSQL DataFrame -#' +#' #' @rdname schema #' @export #' @examples @@ -100,9 +100,9 @@ setMethod(schema, }) #' Explain -#' +#' #' Print the logical and physical Catalyst plans to the console for debugging. -#' +#' #' @param x A SparkSQL DataFrame #' @param extended Logical. If extended is False, explain() only prints the physical plan. #' @rdname explain @@ -200,11 +200,11 @@ setMethod(show, DataFrame, }) #' DataTypes -#' +#' #' Return all column names and their data types as a list -#' +#' #' @param x A SparkSQL DataFrame -#' +#' #' @rdname dtypes #' @export #' @examples @@ -224,11 +224,11 @@ setMethod(dtypes, }) #' Column names -#' +#' #' Return all column names as a list -#' +#' #' @param x A SparkSQL DataFrame -#' +#' #' @rdname columns #' @export #' @examples @@ -256,12 +256,12 @@ setMethod(names, }) #' Register Temporary Table -#' +#' #' Registers a DataFrame as a Temporary Table in the SQLContext -#' +#' #' @param x A SparkSQL DataFrame #' @param tableName A character vector containing the name of the table -#' +#' #' @rdname registerTempTable #' @export #' @examples @@ -306,11 +306,11 @@ setMethod(insertInto, }) #' Cache -#' +#' #' Persist with the default storage level (MEMORY_ONLY). -#' +#' #' @param x
spark git commit: [SPARK-8548] [SPARKR] Remove the trailing whitespaces from the SparkR files
Repository: spark Updated Branches: refs/heads/master c4d234396 - 44fa7df64 [SPARK-8548] [SPARKR] Remove the trailing whitespaces from the SparkR files [[SPARK-8548] Remove the trailing whitespaces from the SparkR files - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-8548) - This is the result of `lint-r` https://gist.github.com/yu-iskw/0019b37a2c1167f33986 Author: Yu ISHIKAWA yuu.ishik...@gmail.com Closes #6945 from yu-iskw/SPARK-8548 and squashes the following commits: 0bd567a [Yu ISHIKAWA] [SPARK-8548][SparkR] Remove the trailing whitespaces from the SparkR files Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/44fa7df6 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/44fa7df6 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/44fa7df6 Branch: refs/heads/master Commit: 44fa7df64daa55bd6eb1f2c219a9701b34e1c2a3 Parents: c4d2343 Author: Yu ISHIKAWA yuu.ishik...@gmail.com Authored: Mon Jun 22 20:55:38 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Mon Jun 22 20:55:38 2015 -0700 -- R/pkg/R/DataFrame.R | 96 ++-- R/pkg/R/RDD.R | 48 +++--- R/pkg/R/SQLContext.R| 14 ++-- R/pkg/R/broadcast.R | 6 +- R/pkg/R/deserialize.R | 2 +- R/pkg/R/generics.R | 15 ++--- R/pkg/R/group.R | 1 - R/pkg/R/jobj.R | 2 +- R/pkg/R/pairRDD.R | 4 +- R/pkg/R/schema.R| 2 +- R/pkg/R/serialize.R | 2 +- R/pkg/R/sparkR.R| 6 +- R/pkg/R/utils.R | 48 +++--- R/pkg/R/zzz.R | 1 - R/pkg/inst/tests/test_binaryFile.R | 7 +- R/pkg/inst/tests/test_binary_function.R | 28 R/pkg/inst/tests/test_rdd.R | 12 ++-- R/pkg/inst/tests/test_shuffle.R | 28 R/pkg/inst/tests/test_sparkSQL.R| 28 R/pkg/inst/tests/test_take.R| 1 - R/pkg/inst/tests/test_textFile.R| 7 +- R/pkg/inst/tests/test_utils.R | 12 ++-- 22 files changed, 182 insertions(+), 188 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/44fa7df6/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 0af5cb8..6feabf4 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -38,7 +38,7 @@ setClass(DataFrame, setMethod(initialize, DataFrame, function(.Object, sdf, isCached) { .Object@env - new.env() .Object@env$isCached - isCached - + .Object@sdf - sdf .Object }) @@ -55,11 +55,11 @@ dataFrame - function(sdf, isCached = FALSE) { DataFrame Methods ## #' Print Schema of a DataFrame -#' +#' #' Prints out the schema in tree format -#' +#' #' @param x A SparkSQL DataFrame -#' +#' #' @rdname printSchema #' @export #' @examples @@ -78,11 +78,11 @@ setMethod(printSchema, }) #' Get schema object -#' +#' #' Returns the schema of this DataFrame as a structType object. -#' +#' #' @param x A SparkSQL DataFrame -#' +#' #' @rdname schema #' @export #' @examples @@ -100,9 +100,9 @@ setMethod(schema, }) #' Explain -#' +#' #' Print the logical and physical Catalyst plans to the console for debugging. -#' +#' #' @param x A SparkSQL DataFrame #' @param extended Logical. If extended is False, explain() only prints the physical plan. #' @rdname explain @@ -200,11 +200,11 @@ setMethod(show, DataFrame, }) #' DataTypes -#' +#' #' Return all column names and their data types as a list -#' +#' #' @param x A SparkSQL DataFrame -#' +#' #' @rdname dtypes #' @export #' @examples @@ -224,11 +224,11 @@ setMethod(dtypes, }) #' Column names -#' +#' #' Return all column names as a list -#' +#' #' @param x A SparkSQL DataFrame -#' +#' #' @rdname columns #' @export #' @examples @@ -256,12 +256,12 @@ setMethod(names, }) #' Register Temporary Table -#' +#' #' Registers a DataFrame as a Temporary Table in the SQLContext -#' +#' #' @param x A SparkSQL DataFrame #' @param tableName A character vector containing the name of the table -#' +#' #' @rdname registerTempTable #' @export #' @examples @@ -306,11 +306,11 @@ setMethod(insertInto, }) #' Cache -#' +#' #' Persist with the default storage level (MEMORY_ONLY). -#' +#' #' @param x A SparkSQL DataFrame -#' +#' #' @rdname cache-methods #' @export #' @examples @@ -400,7 +400,7 @@ setMethod(repartition
spark git commit: [SPARK-8662] SparkR Update SparkSQL Test
Repository: spark Updated Branches: refs/heads/branch-1.4 6abb4fc8a - 78b31a2a6 [SPARK-8662] SparkR Update SparkSQL Test Test `infer_type` using a more fine-grained approach rather than comparing environments. Since `all.equal`'s behavior has changed in R 3.2, the test became unpassable. JIRA here: https://issues.apache.org/jira/browse/SPARK-8662 Author: cafreeman cfree...@alteryx.com Closes #7045 from cafreeman/R32_Test and squashes the following commits: b97cc52 [cafreeman] Add `checkStructField` utility 3381e5c [cafreeman] Update SparkSQL Test Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/78b31a2a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/78b31a2a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/78b31a2a Branch: refs/heads/branch-1.4 Commit: 78b31a2a630c2178987322d0221aeea183ec565f Parents: 6abb4fc Author: cafreeman cfree...@alteryx.com Authored: Fri Jun 26 10:07:35 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Fri Jun 26 10:07:35 2015 -0700 -- R/pkg/inst/tests/test_sparkSQL.R | 15 --- 1 file changed, 12 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/78b31a2a/R/pkg/inst/tests/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index fc7f3f0..52fb7f8 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -19,6 +19,14 @@ library(testthat) context(SparkSQL functions) +# Utility function for easily checking the values of a StructField +checkStructField - function(actual, expectedName, expectedType, expectedNullable) { + expect_equal(class(actual), structField) + expect_equal(actual$name(), expectedName) + expect_equal(actual$dataType.toString(), expectedType) + expect_equal(actual$nullable(), expectedNullable) +} + # Tests for SparkSQL functions in SparkR sc - sparkR.init() @@ -52,9 +60,10 @@ test_that(infer types, { list(type = 'array', elementType = integer, containsNull = TRUE)) expect_equal(infer_type(list(1L, 2L)), list(type = 'array', elementType = integer, containsNull = TRUE)) - expect_equal(infer_type(list(a = 1L, b = 2)), - structType(structField(x = a, type = integer, nullable = TRUE), - structField(x = b, type = string, nullable = TRUE))) + testStruct - infer_type(list(a = 1L, b = 2)) + expect_true(class(testStruct) == structType) + checkStructField(testStruct$fields()[[1]], a, IntegerType, TRUE) + checkStructField(testStruct$fields()[[2]], b, StringType, TRUE) e - new.env() assign(a, 1L, envir = e) expect_equal(infer_type(e), - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8662] SparkR Update SparkSQL Test
Repository: spark Updated Branches: refs/heads/master 41afa1650 - a56516fc9 [SPARK-8662] SparkR Update SparkSQL Test Test `infer_type` using a more fine-grained approach rather than comparing environments. Since `all.equal`'s behavior has changed in R 3.2, the test became unpassable. JIRA here: https://issues.apache.org/jira/browse/SPARK-8662 Author: cafreeman cfree...@alteryx.com Closes #7045 from cafreeman/R32_Test and squashes the following commits: b97cc52 [cafreeman] Add `checkStructField` utility 3381e5c [cafreeman] Update SparkSQL Test (cherry picked from commit 78b31a2a630c2178987322d0221aeea183ec565f) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a56516fc Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a56516fc Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a56516fc Branch: refs/heads/master Commit: a56516fc9280724db8fdef8e7d109ed7e28e427d Parents: 41afa16 Author: cafreeman cfree...@alteryx.com Authored: Fri Jun 26 10:07:35 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Fri Jun 26 10:07:49 2015 -0700 -- R/pkg/inst/tests/test_sparkSQL.R | 15 --- 1 file changed, 12 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a56516fc/R/pkg/inst/tests/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index 417153d..6a08f89 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -19,6 +19,14 @@ library(testthat) context(SparkSQL functions) +# Utility function for easily checking the values of a StructField +checkStructField - function(actual, expectedName, expectedType, expectedNullable) { + expect_equal(class(actual), structField) + expect_equal(actual$name(), expectedName) + expect_equal(actual$dataType.toString(), expectedType) + expect_equal(actual$nullable(), expectedNullable) +} + # Tests for SparkSQL functions in SparkR sc - sparkR.init() @@ -52,9 +60,10 @@ test_that(infer types, { list(type = 'array', elementType = integer, containsNull = TRUE)) expect_equal(infer_type(list(1L, 2L)), list(type = 'array', elementType = integer, containsNull = TRUE)) - expect_equal(infer_type(list(a = 1L, b = 2)), - structType(structField(x = a, type = integer, nullable = TRUE), - structField(x = b, type = string, nullable = TRUE))) + testStruct - infer_type(list(a = 1L, b = 2)) + expect_true(class(testStruct) == structType) + checkStructField(testStruct$fields()[[1]], a, IntegerType, TRUE) + checkStructField(testStruct$fields()[[2]], b, StringType, TRUE) e - new.env() assign(a, 1L, envir = e) expect_equal(infer_type(e), - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8495] [SPARKR] Add a `.lintr` file to validate the SparkR files and the `lint-r` script
Repository: spark Updated Branches: refs/heads/master 7a3c424ec - 004f57374 [SPARK-8495] [SPARKR] Add a `.lintr` file to validate the SparkR files and the `lint-r` script Thank Shivaram Venkataraman for your support. This is a prototype script to validate the R files. Author: Yu ISHIKAWA yuu.ishik...@gmail.com Closes #6922 from yu-iskw/SPARK-6813 and squashes the following commits: c1ffe6b [Yu ISHIKAWA] Modify to save result to a log file and add a rule to validate 5520806 [Yu ISHIKAWA] Exclude the .lintr file not to check Apache lincence 8f94680 [Yu ISHIKAWA] [SPARK-8495][SparkR] Add a `.lintr` file to validate the SparkR files and the `lint-r` script Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/004f5737 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/004f5737 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/004f5737 Branch: refs/heads/master Commit: 004f57374b98c4df32d9f1e19221f68e92639a49 Parents: 7a3c424 Author: Yu ISHIKAWA yuu.ishik...@gmail.com Authored: Sat Jun 20 16:10:14 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Sat Jun 20 16:10:14 2015 -0700 -- .gitignore| 1 + .rat-excludes | 1 + R/pkg/.lintr | 2 ++ dev/lint-r| 30 ++ dev/lint-r.R | 29 + 5 files changed, 63 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/004f5737/.gitignore -- diff --git a/.gitignore b/.gitignore index 3624d12..debad77 100644 --- a/.gitignore +++ b/.gitignore @@ -66,6 +66,7 @@ scalastyle-output.xml R-unit-tests.log R/unit-tests.out python/lib/pyspark.zip +lint-r-report.log # For Hive metastore_db/ http://git-wip-us.apache.org/repos/asf/spark/blob/004f5737/.rat-excludes -- diff --git a/.rat-excludes b/.rat-excludes index aa008e6..c24667c 100644 --- a/.rat-excludes +++ b/.rat-excludes @@ -86,3 +86,4 @@ local-1430917381535_2 DESCRIPTION NAMESPACE test_support/* +.lintr http://git-wip-us.apache.org/repos/asf/spark/blob/004f5737/R/pkg/.lintr -- diff --git a/R/pkg/.lintr b/R/pkg/.lintr new file mode 100644 index 000..b10ebd3 --- /dev/null +++ b/R/pkg/.lintr @@ -0,0 +1,2 @@ +linters: with_defaults(line_length_linter(100), camel_case_linter = NULL) +exclusions: list(inst/profile/general.R = 1, inst/profile/shell.R) http://git-wip-us.apache.org/repos/asf/spark/blob/004f5737/dev/lint-r -- diff --git a/dev/lint-r b/dev/lint-r new file mode 100755 index 000..7d5f4cd --- /dev/null +++ b/dev/lint-r @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the License); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +SCRIPT_DIR=$( cd $( dirname $0 ) pwd ) +SPARK_ROOT_DIR=$(dirname $SCRIPT_DIR) +LINT_R_REPORT_FILE_NAME=$SPARK_ROOT_DIR/dev/lint-r-report.log + + +if ! type Rscript /dev/null; then + echo ERROR: You should install R + exit +fi + +`which Rscript` --vanilla $SPARK_ROOT_DIR/dev/lint-r.R $SPARK_ROOT_DIR | tee $LINT_R_REPORT_FILE_NAME http://git-wip-us.apache.org/repos/asf/spark/blob/004f5737/dev/lint-r.R -- diff --git a/dev/lint-r.R b/dev/lint-r.R new file mode 100644 index 000..dcb1a18 --- /dev/null +++ b/dev/lint-r.R @@ -0,0 +1,29 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the License); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under
spark git commit: [SPARK-8576] Add spark-ec2 options to set IAM roles and instance-initiated shutdown behavior
Repository: spark Updated Branches: refs/heads/master bba6699d0 - 31f48e5af [SPARK-8576] Add spark-ec2 options to set IAM roles and instance-initiated shutdown behavior Both of these options are useful when spark-ec2 is being used as part of an automated pipeline and the engineers want to minimize the need to pass around AWS keys for access to things like S3 (keys are replaced by the IAM role) and to be able to launch a cluster that can terminate itself cleanly. Author: Nicholas Chammas nicholas.cham...@gmail.com Closes #6962 from nchammas/additional-ec2-options and squashes the following commits: fcf252e [Nicholas Chammas] PEP8 fixes efba9ee [Nicholas Chammas] add help for --instance-initiated-shutdown-behavior 598aecf [Nicholas Chammas] option to launch instances into IAM role 2743632 [Nicholas Chammas] add option for instance initiated shutdown Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/31f48e5a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/31f48e5a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/31f48e5a Branch: refs/heads/master Commit: 31f48e5af887a9ccc9cea0218c36bf52bbf49d24 Parents: bba6699 Author: Nicholas Chammas nicholas.cham...@gmail.com Authored: Wed Jun 24 11:20:51 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Wed Jun 24 11:20:51 2015 -0700 -- ec2/spark_ec2.py | 56 --- 1 file changed, 35 insertions(+), 21 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/31f48e5a/ec2/spark_ec2.py -- diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 63e2c79..e4932cf 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -306,6 +306,13 @@ def parse_args(): --private-ips, action=store_true, default=False, help=Use private IPs for instances rather than public if VPC/subnet + requires that.) +parser.add_option( +--instance-initiated-shutdown-behavior, default=stop, +choices=[stop, terminate], +help=Whether instances should terminate when shut down or just stop) +parser.add_option( +--instance-profile-name, default=None, +help=IAM profile name to launch instances under) (opts, args) = parser.parse_args() if len(args) != 2: @@ -602,7 +609,8 @@ def launch_cluster(conn, opts, cluster_name): block_device_map=block_map, subnet_id=opts.subnet_id, placement_group=opts.placement_group, -user_data=user_data_content) +user_data=user_data_content, +instance_profile_name=opts.instance_profile_name) my_req_ids += [req.id for req in slave_reqs] i += 1 @@ -647,16 +655,19 @@ def launch_cluster(conn, opts, cluster_name): for zone in zones: num_slaves_this_zone = get_partition(opts.slaves, num_zones, i) if num_slaves_this_zone 0: -slave_res = image.run(key_name=opts.key_pair, - security_group_ids=[slave_group.id] + additional_group_ids, - instance_type=opts.instance_type, - placement=zone, - min_count=num_slaves_this_zone, - max_count=num_slaves_this_zone, - block_device_map=block_map, - subnet_id=opts.subnet_id, - placement_group=opts.placement_group, - user_data=user_data_content) +slave_res = image.run( +key_name=opts.key_pair, +security_group_ids=[slave_group.id] + additional_group_ids, +instance_type=opts.instance_type, +placement=zone, +min_count=num_slaves_this_zone, +max_count=num_slaves_this_zone, +block_device_map=block_map, +subnet_id=opts.subnet_id, +placement_group=opts.placement_group, +user_data=user_data_content, + instance_initiated_shutdown_behavior=opts.instance_initiated_shutdown_behavior, +instance_profile_name=opts.instance_profile_name) slave_nodes += slave_res.instances print(Launched {s} slave{plural_s} in {z}, regid = {r}.format( s=num_slaves_this_zone, @@ -678,16 +689,19 @@ def launch_cluster(conn, opts, cluster_name): master_type = opts.instance_type if opts.zone
spark git commit: [SPARK-8506] Add pakages to R context created through init.
Repository: spark Updated Branches: refs/heads/master 1173483f3 - 43e66192f [SPARK-8506] Add pakages to R context created through init. Author: Holden Karau hol...@pigscanfly.ca Closes #6928 from holdenk/SPARK-8506-sparkr-does-not-provide-an-easy-way-to-depend-on-spark-packages-when-performing-init-from-inside-of-r and squashes the following commits: b60dd63 [Holden Karau] Add an example with the spark-csv package fa8bc92 [Holden Karau] typo: sparm - spark 865a90c [Holden Karau] strip spaces for comparision c7a4471 [Holden Karau] Add some documentation c1a9233 [Holden Karau] refactor for testing c818556 [Holden Karau] Add pakages to R Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/43e66192 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/43e66192 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/43e66192 Branch: refs/heads/master Commit: 43e66192f45a23f7232116e9f664158862df5015 Parents: 1173483 Author: Holden Karau hol...@pigscanfly.ca Authored: Wed Jun 24 11:55:20 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Wed Jun 24 11:55:20 2015 -0700 -- R/pkg/R/client.R | 26 +++--- R/pkg/R/sparkR.R | 7 +-- R/pkg/inst/tests/test_client.R | 32 docs/sparkr.md | 17 + 4 files changed, 69 insertions(+), 13 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/43e66192/R/pkg/R/client.R -- diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R index 1281c41..cf2e5dd 100644 --- a/R/pkg/R/client.R +++ b/R/pkg/R/client.R @@ -34,24 +34,36 @@ connectBackend - function(hostname, port, timeout = 6000) { con } -launchBackend - function(args, sparkHome, jars, sparkSubmitOpts) { +determineSparkSubmitBin - function() { if (.Platform$OS.type == unix) { sparkSubmitBinName = spark-submit } else { sparkSubmitBinName = spark-submit.cmd } + sparkSubmitBinName +} + +generateSparkSubmitArgs - function(args, sparkHome, jars, sparkSubmitOpts, packages) { + if (jars != ) { +jars - paste(--jars, jars) + } + + if (packages != ) { +packages - paste(--packages, packages) + } + combinedArgs - paste(jars, packages, sparkSubmitOpts, args, sep = ) + combinedArgs +} + +launchBackend - function(args, sparkHome, jars, sparkSubmitOpts, packages) { + sparkSubmitBin - determineSparkSubmitBin() if (sparkHome != ) { sparkSubmitBin - file.path(sparkHome, bin, sparkSubmitBinName) } else { sparkSubmitBin - sparkSubmitBinName } - - if (jars != ) { -jars - paste(--jars, jars) - } - - combinedArgs - paste(jars, sparkSubmitOpts, args, sep = ) + combinedArgs - generateSparkSubmitArgs(args, sparkHome, jars, sparkSubmitOpts, packages) cat(Launching java with spark-submit command, sparkSubmitBin, combinedArgs, \n) invisible(system2(sparkSubmitBin, combinedArgs, wait = F)) } http://git-wip-us.apache.org/repos/asf/spark/blob/43e66192/R/pkg/R/sparkR.R -- diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index dbde0c4..8f81d56 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -81,6 +81,7 @@ sparkR.stop - function() { #' @param sparkExecutorEnv Named list of environment variables to be used when launching executors. #' @param sparkJars Character string vector of jar files to pass to the worker nodes. #' @param sparkRLibDir The path where R is installed on the worker nodes. +#' @param sparkPackages Character string vector of packages from spark-packages.org #' @export #' @examples #'\dontrun{ @@ -100,7 +101,8 @@ sparkR.init - function( sparkEnvir = list(), sparkExecutorEnv = list(), sparkJars = , - sparkRLibDir = ) { + sparkRLibDir = , + sparkPackages = ) { if (exists(.sparkRjsc, envir = .sparkREnv)) { cat(Re-using existing Spark Context. Please stop SparkR with sparkR.stop() or restart R to create a new Spark Context\n) @@ -129,7 +131,8 @@ sparkR.init - function( args = path, sparkHome = sparkHome, jars = jars, -sparkSubmitOpts = Sys.getenv(SPARKR_SUBMIT_ARGS, sparkr-shell)) +sparkSubmitOpts = Sys.getenv(SPARKR_SUBMIT_ARGS, sparkr-shell), +sparkPackages = sparkPackages) # wait atmost 100 seconds for JVM to launch wait - 0.1 for (i in 1:25) { http://git-wip-us.apache.org/repos/asf/spark/blob/43e66192/R/pkg/inst/tests/test_client.R -- diff --git a/R/pkg/inst/tests/test_client.R b/R/pkg/inst/tests/test_client.R new file mode 100644 index 000..30b05c1 --- /dev/null +++ b/R/pkg/inst/tests/test_client.R
spark git commit: [SPARK-8506] Add pakages to R context created through init.
Repository: spark Updated Branches: refs/heads/branch-1.4 7e53ff258 - f6682dd6e [SPARK-8506] Add pakages to R context created through init. Author: Holden Karau hol...@pigscanfly.ca Closes #6928 from holdenk/SPARK-8506-sparkr-does-not-provide-an-easy-way-to-depend-on-spark-packages-when-performing-init-from-inside-of-r and squashes the following commits: b60dd63 [Holden Karau] Add an example with the spark-csv package fa8bc92 [Holden Karau] typo: sparm - spark 865a90c [Holden Karau] strip spaces for comparision c7a4471 [Holden Karau] Add some documentation c1a9233 [Holden Karau] refactor for testing c818556 [Holden Karau] Add pakages to R (cherry picked from commit 43e66192f45a23f7232116e9f664158862df5015) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f6682dd6 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f6682dd6 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f6682dd6 Branch: refs/heads/branch-1.4 Commit: f6682dd6e8ab8c5acddd1cf20317bea3afcbcae7 Parents: 7e53ff2 Author: Holden Karau hol...@pigscanfly.ca Authored: Wed Jun 24 11:55:20 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Wed Jun 24 11:55:29 2015 -0700 -- R/pkg/R/client.R | 26 +++--- R/pkg/R/sparkR.R | 7 +-- R/pkg/inst/tests/test_client.R | 32 docs/sparkr.md | 17 + 4 files changed, 69 insertions(+), 13 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f6682dd6/R/pkg/R/client.R -- diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R index 1281c41..cf2e5dd 100644 --- a/R/pkg/R/client.R +++ b/R/pkg/R/client.R @@ -34,24 +34,36 @@ connectBackend - function(hostname, port, timeout = 6000) { con } -launchBackend - function(args, sparkHome, jars, sparkSubmitOpts) { +determineSparkSubmitBin - function() { if (.Platform$OS.type == unix) { sparkSubmitBinName = spark-submit } else { sparkSubmitBinName = spark-submit.cmd } + sparkSubmitBinName +} + +generateSparkSubmitArgs - function(args, sparkHome, jars, sparkSubmitOpts, packages) { + if (jars != ) { +jars - paste(--jars, jars) + } + + if (packages != ) { +packages - paste(--packages, packages) + } + combinedArgs - paste(jars, packages, sparkSubmitOpts, args, sep = ) + combinedArgs +} + +launchBackend - function(args, sparkHome, jars, sparkSubmitOpts, packages) { + sparkSubmitBin - determineSparkSubmitBin() if (sparkHome != ) { sparkSubmitBin - file.path(sparkHome, bin, sparkSubmitBinName) } else { sparkSubmitBin - sparkSubmitBinName } - - if (jars != ) { -jars - paste(--jars, jars) - } - - combinedArgs - paste(jars, sparkSubmitOpts, args, sep = ) + combinedArgs - generateSparkSubmitArgs(args, sparkHome, jars, sparkSubmitOpts, packages) cat(Launching java with spark-submit command, sparkSubmitBin, combinedArgs, \n) invisible(system2(sparkSubmitBin, combinedArgs, wait = F)) } http://git-wip-us.apache.org/repos/asf/spark/blob/f6682dd6/R/pkg/R/sparkR.R -- diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index dbde0c4..8f81d56 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -81,6 +81,7 @@ sparkR.stop - function() { #' @param sparkExecutorEnv Named list of environment variables to be used when launching executors. #' @param sparkJars Character string vector of jar files to pass to the worker nodes. #' @param sparkRLibDir The path where R is installed on the worker nodes. +#' @param sparkPackages Character string vector of packages from spark-packages.org #' @export #' @examples #'\dontrun{ @@ -100,7 +101,8 @@ sparkR.init - function( sparkEnvir = list(), sparkExecutorEnv = list(), sparkJars = , - sparkRLibDir = ) { + sparkRLibDir = , + sparkPackages = ) { if (exists(.sparkRjsc, envir = .sparkREnv)) { cat(Re-using existing Spark Context. Please stop SparkR with sparkR.stop() or restart R to create a new Spark Context\n) @@ -129,7 +131,8 @@ sparkR.init - function( args = path, sparkHome = sparkHome, jars = jars, -sparkSubmitOpts = Sys.getenv(SPARKR_SUBMIT_ARGS, sparkr-shell)) +sparkSubmitOpts = Sys.getenv(SPARKR_SUBMIT_ARGS, sparkr-shell), +sparkPackages = sparkPackages) # wait atmost 100 seconds for JVM to launch wait - 0.1 for (i in 1:25) { http://git-wip-us.apache.org/repos/asf/spark/blob/f6682dd6/R/pkg/inst/tests/test_client.R -- diff --git a/R/pkg/inst/tests
spark git commit: [SPARK-8607] SparkR -- jars not being added to application classpath correctly
Repository: spark Updated Branches: refs/heads/master a56516fc9 - 9d1181776 [SPARK-8607] SparkR -- jars not being added to application classpath correctly Add `getStaticClass` method in SparkR's `RBackendHandler` This is a fix for the problem referenced in [SPARK-5185](https://issues.apache.org/jira/browse/SPARK-5185). cc shivaram Author: cafreeman cfree...@alteryx.com Closes #7001 from cafreeman/branch-1.4 and squashes the following commits: 8f81194 [cafreeman] Add missing license 31aedcf [cafreeman] Refactor test to call an external R script 2c22073 [cafreeman] Merge branch 'branch-1.4' of github.com:apache/spark into branch-1.4 0bea809 [cafreeman] Fixed relative path issue and added smaller JAR ee25e60 [cafreeman] Merge branch 'branch-1.4' of github.com:apache/spark into branch-1.4 9a5c362 [cafreeman] test for including JAR when launching sparkContext 9101223 [cafreeman] Merge branch 'branch-1.4' of github.com:apache/spark into branch-1.4 5a80844 [cafreeman] Fix style nits 7c6bd0c [cafreeman] [SPARK-8607] SparkR (cherry picked from commit 2579948bf5d89ac2d822ace605a6a4afce5258d6) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9d118177 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9d118177 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9d118177 Branch: refs/heads/master Commit: 9d11817765e2817b11b73c61bae3b32c9f119cfd Parents: a56516f Author: cafreeman cfree...@alteryx.com Authored: Fri Jun 26 17:06:02 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Fri Jun 26 17:06:16 2015 -0700 -- .../inst/test_support/sparktestjar_2.10-1.0.jar | Bin 0 - 2886 bytes R/pkg/inst/tests/jarTest.R | 32 R/pkg/inst/tests/test_includeJAR.R | 37 +++ .../apache/spark/api/r/RBackendHandler.scala| 17 - 4 files changed, 85 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9d118177/R/pkg/inst/test_support/sparktestjar_2.10-1.0.jar -- diff --git a/R/pkg/inst/test_support/sparktestjar_2.10-1.0.jar b/R/pkg/inst/test_support/sparktestjar_2.10-1.0.jar new file mode 100644 index 000..1d5c2af Binary files /dev/null and b/R/pkg/inst/test_support/sparktestjar_2.10-1.0.jar differ http://git-wip-us.apache.org/repos/asf/spark/blob/9d118177/R/pkg/inst/tests/jarTest.R -- diff --git a/R/pkg/inst/tests/jarTest.R b/R/pkg/inst/tests/jarTest.R new file mode 100644 index 000..d68bb20 --- /dev/null +++ b/R/pkg/inst/tests/jarTest.R @@ -0,0 +1,32 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the License); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +library(SparkR) + +sc - sparkR.init() + +helloTest - SparkR:::callJStatic(sparkR.test.hello, + helloWorld, + Dave) + +basicFunction - SparkR:::callJStatic(sparkR.test.basicFunction, + addStuff, + 2L, + 2L) + +sparkR.stop() +output - c(helloTest, basicFunction) +writeLines(output) http://git-wip-us.apache.org/repos/asf/spark/blob/9d118177/R/pkg/inst/tests/test_includeJAR.R -- diff --git a/R/pkg/inst/tests/test_includeJAR.R b/R/pkg/inst/tests/test_includeJAR.R new file mode 100644 index 000..8bc693b --- /dev/null +++ b/R/pkg/inst/tests/test_includeJAR.R @@ -0,0 +1,37 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the License); you may not use this file except in compliance with +# the License. You may obtain a copy of the License
spark git commit: [SPARK-8607] SparkR -- jars not being added to application classpath correctly
Repository: spark Updated Branches: refs/heads/branch-1.4 78b31a2a6 - 2579948bf [SPARK-8607] SparkR -- jars not being added to application classpath correctly Add `getStaticClass` method in SparkR's `RBackendHandler` This is a fix for the problem referenced in [SPARK-5185](https://issues.apache.org/jira/browse/SPARK-5185). cc shivaram Author: cafreeman cfree...@alteryx.com Closes #7001 from cafreeman/branch-1.4 and squashes the following commits: 8f81194 [cafreeman] Add missing license 31aedcf [cafreeman] Refactor test to call an external R script 2c22073 [cafreeman] Merge branch 'branch-1.4' of github.com:apache/spark into branch-1.4 0bea809 [cafreeman] Fixed relative path issue and added smaller JAR ee25e60 [cafreeman] Merge branch 'branch-1.4' of github.com:apache/spark into branch-1.4 9a5c362 [cafreeman] test for including JAR when launching sparkContext 9101223 [cafreeman] Merge branch 'branch-1.4' of github.com:apache/spark into branch-1.4 5a80844 [cafreeman] Fix style nits 7c6bd0c [cafreeman] [SPARK-8607] SparkR Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2579948b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2579948b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2579948b Branch: refs/heads/branch-1.4 Commit: 2579948bf5d89ac2d822ace605a6a4afce5258d6 Parents: 78b31a2 Author: cafreeman cfree...@alteryx.com Authored: Fri Jun 26 17:06:02 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Fri Jun 26 17:06:02 2015 -0700 -- .../inst/test_support/sparktestjar_2.10-1.0.jar | Bin 0 - 2886 bytes R/pkg/inst/tests/jarTest.R | 32 R/pkg/inst/tests/test_includeJAR.R | 37 +++ .../apache/spark/api/r/RBackendHandler.scala| 17 - 4 files changed, 85 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2579948b/R/pkg/inst/test_support/sparktestjar_2.10-1.0.jar -- diff --git a/R/pkg/inst/test_support/sparktestjar_2.10-1.0.jar b/R/pkg/inst/test_support/sparktestjar_2.10-1.0.jar new file mode 100644 index 000..1d5c2af Binary files /dev/null and b/R/pkg/inst/test_support/sparktestjar_2.10-1.0.jar differ http://git-wip-us.apache.org/repos/asf/spark/blob/2579948b/R/pkg/inst/tests/jarTest.R -- diff --git a/R/pkg/inst/tests/jarTest.R b/R/pkg/inst/tests/jarTest.R new file mode 100644 index 000..d68bb20 --- /dev/null +++ b/R/pkg/inst/tests/jarTest.R @@ -0,0 +1,32 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the License); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +library(SparkR) + +sc - sparkR.init() + +helloTest - SparkR:::callJStatic(sparkR.test.hello, + helloWorld, + Dave) + +basicFunction - SparkR:::callJStatic(sparkR.test.basicFunction, + addStuff, + 2L, + 2L) + +sparkR.stop() +output - c(helloTest, basicFunction) +writeLines(output) http://git-wip-us.apache.org/repos/asf/spark/blob/2579948b/R/pkg/inst/tests/test_includeJAR.R -- diff --git a/R/pkg/inst/tests/test_includeJAR.R b/R/pkg/inst/tests/test_includeJAR.R new file mode 100644 index 000..8bc693b --- /dev/null +++ b/R/pkg/inst/tests/test_includeJAR.R @@ -0,0 +1,37 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the License); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software
svn commit: r1684946 - in /spark: releases/_posts/2015-06-11-spark-release-1-4-0.md site/releases/spark-release-1-4-0.html
Author: shivaram Date: Thu Jun 11 16:59:48 2015 New Revision: 1684946 URL: http://svn.apache.org/r1684946 Log: Add some more SparkR contributors Modified: spark/releases/_posts/2015-06-11-spark-release-1-4-0.md spark/site/releases/spark-release-1-4-0.html Modified: spark/releases/_posts/2015-06-11-spark-release-1-4-0.md URL: http://svn.apache.org/viewvc/spark/releases/_posts/2015-06-11-spark-release-1-4-0.md?rev=1684946r1=1684945r2=1684946view=diff == --- spark/releases/_posts/2015-06-11-spark-release-1-4-0.md (original) +++ spark/releases/_posts/2015-06-11-spark-release-1-4-0.md Thu Jun 11 16:59:48 2015 @@ -99,7 +99,9 @@ Thanks to The following organizations, w * Andrew Or -- Bug fixes in Core * Andrew Or -- Improvements in Core and YARN; bug fixes in Core, Web UI, Streaming, tests, and SQL; improvement in Streaming, Web UI, Core, and SQL * Andrey Zagrebin -- Improvement in SQL + * Antonio Piccolboni -- New features in SparkR * Arsenii Krasikov -- Bug fixes in Core + * Ashutosh Raina -- New features in SparkR * Ashwin Shankar -- Bug fixes in YARN * Augustin Borsu -- New features in MLlib * Ben Fradet -- Documentation in Core and Streaming @@ -115,6 +117,7 @@ Thanks to The following organizations, w * Cheng Lian -- Bug fixes in SQL * Cheng Lian -- Improvements in Core and SQL; documentation in Core and SQL; bug fixes in Core and SQL; improvement in SQL * Cheolsoo Park -- Wish in YARN; improvements in Core and spark submit; bug fixes in Core + * Chris Freeman -- New features in SparkR * Chet Mancini -- Improvements in Core and SQL * Chris Heller -- New features in Mesos * Christophe Preaud -- Documentation in Core and YARN @@ -122,23 +125,27 @@ Thanks to The following organizations, w * DB Tsai -- Improvements, new features, and bug fixes in MLlib * DEBORAH SIEGEL -- Documentation in Core * Dan McClary -- New features in GraphX + * Dan Putler -- New features in SparkR * Daoyuan Wang -- Improvements in tests and SQL; new features in SQL; bug fixes in SQL; improvement in MLlib and SQL * David McGuire -- Bug fixes in Streaming - * Davies Liu -- Improvements in SQL and PySpark; new features in Core and sparkr; bug fixes in Streaming, tests, PySpark, sparkr, and SQL; improvement in Core and SQL - * Davies Liu -- New features in sparkr + * Davies Liu -- Improvements in SQL and PySpark; new features in Core and SparkR; bug fixes in Streaming, tests, PySpark, SparkR, and SQL; improvement in Core and SQL + * Davies Liu -- New features in SparkR * Dean Chen -- Improvements in Core; new features in YARN; bug fixes in Core and YARN * Debasish Das -- New features in MLlib * Deborah Siegel -- Improvements in Core * Doing Done -- Improvements in SQL; bug fixes in Core and SQL * Dong Xu -- Bug fixes in SQL * Doug Balog -- Bug fixes in spark submit, YARN, and SQL + * Edward T -- New features in SparkR * Elisey Zanko -- Bug fixes in MLlib and PySpark * Emre Sevinc -- Improvements in Streaming * Eric Chiang -- Documentation in Core * Erik Van Oosten -- Bug fixes in Core * Evan Jones -- Bug fixes in Core * Evan Yu -- Bug fixes in Core + * Evert Lammerts -- New features in SparkR * Favio Vazquez -- Build fixes in Core; documentation in Core and MLlib + * Felix Cheung -- SparkR Documentation * Florian Verhein -- Improvements and new features in EC2 * Gaurav Nanda -- Documentation in Core * Glenn Weidner -- Documentation in MLlib and PySpark @@ -148,9 +155,11 @@ Thanks to The following organizations, w * GuoQiang Li -- New features in Core; bug fixes in Core and YARN * Haiyang Sea -- Improvements in SQL * Hangchen Yu -- Documentation in GraphX - * Hao Lin -- Improvements and new features in sparkr + * Hao Lin -- Improvements and new features in SparkR * Hari Shreedharan -- Test in Streaming and tests; new features in YARN; bug fixes in Web UI + * Harihar Nahak -- New features in SparkR * Holden Karau -- Improvements in Core, MLlib, and PySpark; bug fixes in PySpark + * Hossein Falaki -- SparkR Documentation * Hong Shen -- Bug fixes in Core and YARN * Hrishikesh Subramonian -- Improvements in MLlib and PySpark * Hung Lin -- Bug fixes in scheduler @@ -163,7 +172,7 @@ Thanks to The following organizations, w * Jaonary Rabarisoa -- Improvements in MLlib * Jayson Sunshine -- Documentation in Core * Jean Lyn -- Bug fixes in SQL - * Jeff Harrison -- Improvements in sparkr + * Jeff Harrison -- Improvements in SparkR * Jeremy A. Lucas -- Improvements in Streaming * Jeremy Freeman -- Bug fixes in Streaming and MLlib * Jim Carroll -- Bug fixes in MLlib @@ -199,6 +208,7 @@ Thanks to The following organizations, w * Masayoshi TSUZUKI -- Bug fixes in Windows and Core * Matei Zaharia -- Improvement in Web UI * Matt Aasted -- Bug fixes in EC2 + * Matt Massie -- New features in SparkR * Matt Wise -- Documentation in Core * Matthew Cheah
spark git commit: [SPARK-8310] [EC2] Updates the master branch EC2 versions
Repository: spark Updated Branches: refs/heads/master 1191c3efc - c8d551d54 [SPARK-8310] [EC2] Updates the master branch EC2 versions Will send another PR for `branch-1.4` Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Closes #6764 from shivaram/SPARK-8310 and squashes the following commits: d8cd3b3 [Shivaram Venkataraman] This updates the master branch EC2 versions Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c8d551d5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c8d551d5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c8d551d5 Branch: refs/heads/master Commit: c8d551d546979e126c91925487e30c353185e3ba Parents: 1191c3e Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Authored: Thu Jun 11 13:18:42 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Thu Jun 11 13:18:42 2015 -0700 -- ec2/spark_ec2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c8d551d5/ec2/spark_ec2.py -- diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 84629cb..58b24ae 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -51,7 +51,7 @@ else: raw_input = input xrange = range -SPARK_EC2_VERSION = 1.3.1 +SPARK_EC2_VERSION = 1.4.0 SPARK_EC2_DIR = os.path.dirname(os.path.realpath(__file__)) VALID_SPARK_VERSIONS = set([ @@ -89,7 +89,7 @@ DEFAULT_SPARK_GITHUB_REPO = https://github.com/apache/spark; # Default location to get the spark-ec2 scripts (and ami-list) from DEFAULT_SPARK_EC2_GITHUB_REPO = https://github.com/mesos/spark-ec2; -DEFAULT_SPARK_EC2_BRANCH = branch-1.3 +DEFAULT_SPARK_EC2_BRANCH = branch-1.4 def setup_external_libs(libs): - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8350] [R] Log R unit test output to unit-tests.log
Repository: spark Updated Branches: refs/heads/master 4c5889e8f - 56d4e8a2d [SPARK-8350] [R] Log R unit test output to unit-tests.log Right now it's logged to R-unit-tests.log. Jenkins currently only archives files named unit-tests.log, and this is what all other modules (e.g. SQL, network, REPL) use. 1. We should be consistent 2. I don't want to reconfigure Jenkins to accept a different file shivaram Author: andrewor14 and...@databricks.com Author: Andrew Or and...@databricks.com Closes #6807 from andrewor14/r-logs and squashes the following commits: 96005d2 [andrewor14] Nest unit-tests.log further until R 407c46c [andrewor14] Add target to log path d7b68ae [Andrew Or] Log R unit test output to unit-tests.log Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/56d4e8a2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/56d4e8a2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/56d4e8a2 Branch: refs/heads/master Commit: 56d4e8a2d0f6aab9a599cd8733e20500ffe8fc8a Parents: 4c5889e Author: andrewor14 and...@databricks.com Authored: Mon Jun 15 08:16:22 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Mon Jun 15 08:16:22 2015 -0700 -- R/log4j.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/56d4e8a2/R/log4j.properties -- diff --git a/R/log4j.properties b/R/log4j.properties index 701adb2..cce8d91 100644 --- a/R/log4j.properties +++ b/R/log4j.properties @@ -19,7 +19,7 @@ log4j.rootCategory=INFO, file log4j.appender.file=org.apache.log4j.FileAppender log4j.appender.file.append=true -log4j.appender.file.file=R-unit-tests.log +log4j.appender.file.file=R/target/unit-tests.log log4j.appender.file.layout=org.apache.log4j.PatternLayout log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r1685612 - /spark/site/index.html
Author: shivaram Date: Mon Jun 15 16:05:17 2015 New Revision: 1685612 URL: http://svn.apache.org/r1685612 Log: Add html for R update Modified: spark/site/index.html Modified: spark/site/index.html URL: http://svn.apache.org/viewvc/spark/site/index.html?rev=1685612r1=1685611r2=1685612view=diff == --- spark/site/index.html (original) +++ spark/site/index.html Mon Jun 15 16:05:17 2015 @@ -200,13 +200,13 @@ h2Ease of Use/h2 p class=lead - Write applications quickly in Java, Scala or Python. + Write applications quickly in Java, Scala, Python, R. /p p Spark offers over 80 high-level operators that make it easy to build parallel apps. And you can use it eminteractively/em - from the Scala and Python shells. + from the Scala, Python and R shells. /p /div div class=col-md-5 col-sm-5 col-padded-top col-center - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r1685610 - /spark/index.md
Author: shivaram Date: Mon Jun 15 16:04:31 2015 New Revision: 1685610 URL: http://svn.apache.org/r1685610 Log: Add R to list of supported languages Modified: spark/index.md Modified: spark/index.md URL: http://svn.apache.org/viewvc/spark/index.md?rev=1685610r1=1685609r2=1685610view=diff == --- spark/index.md (original) +++ spark/index.md Mon Jun 15 16:04:31 2015 @@ -41,13 +41,13 @@ navigation: h2Ease of Use/h2 p class=lead - Write applications quickly in Java, Scala or Python. + Write applications quickly in Java, Scala, Python, R. /p p Spark offers over 80 high-level operators that make it easy to build parallel apps. And you can use it eminteractively/em - from the Scala and Python shells. + from the Scala, Python and R shells. /p /div div class=col-md-5 col-sm-5 col-padded-top col-center - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8322] [EC2] Added spark 1.4.0 into the VALID_SPARK_VERSIONS and…
Repository: spark Updated Branches: refs/heads/branch-1.4 8b25f62bf - 141eab71e [SPARK-8322] [EC2] Added spark 1.4.0 into the VALID_SPARK_VERSIONS and… … SPARK_TACHYON_MAP Author: Mark Smith mark.sm...@bronto.com Closes #6777 from markmsmith/branch-1.4 and squashes the following commits: a218cfa [Mark Smith] [SPARK-8322][EC2] Fixed tachyon mapp entry to point to 0.6.4 90d1655 [Mark Smith] [SPARK-8322][EC2] Added spark 1.4.0 into the VALID_SPARK_VERSIONS and SPARK_TACHYON_MAP Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/141eab71 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/141eab71 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/141eab71 Branch: refs/heads/branch-1.4 Commit: 141eab71ee3aa05da899ecfc6bae40b3798a4665 Parents: 8b25f62 Author: Mark Smith mark.sm...@bronto.com Authored: Fri Jun 12 10:28:30 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Fri Jun 12 10:28:30 2015 -0700 -- ec2/spark_ec2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/141eab71/ec2/spark_ec2.py -- diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index a765c20..5aa3e3d 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -70,7 +70,7 @@ VALID_SPARK_VERSIONS = set([ 1.2.1, 1.3.0, 1.3.1, -1.4.0 +1.4.0, ]) SPARK_TACHYON_MAP = { @@ -83,6 +83,7 @@ SPARK_TACHYON_MAP = { 1.2.1: 0.5.0, 1.3.0: 0.5.0, 1.3.1: 0.5.0, +1.4.0: 0.6.4, } DEFAULT_SPARK_VERSION = SPARK_EC2_VERSION - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-7227] [SPARKR] Support fillna / dropna in R DataFrame.
Repository: spark Updated Branches: refs/heads/branch-1.4 bab0fab68 - f1d4e7e31 [SPARK-7227] [SPARKR] Support fillna / dropna in R DataFrame. Author: Sun Rui rui@intel.com Closes #6183 from sun-rui/SPARK-7227 and squashes the following commits: dd6f5b3 [Sun Rui] Rename readEnv() back to readMap(). Add alias na.omit() for dropna(). 41cf725 [Sun Rui] [SPARK-7227][SPARKR] Support fillna / dropna in R DataFrame. (cherry picked from commit 46576ab303e50c54c3bd464f8939953efe644574) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f1d4e7e3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f1d4e7e3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f1d4e7e3 Branch: refs/heads/branch-1.4 Commit: f1d4e7e3111a6a44358d405389180d6cf6406223 Parents: bab0fab Author: Sun Rui rui@intel.com Authored: Sun May 31 15:01:21 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Sun May 31 15:02:16 2015 -0700 -- R/pkg/NAMESPACE | 2 + R/pkg/R/DataFrame.R | 125 +++ R/pkg/R/generics.R | 18 +++ R/pkg/R/serialize.R | 10 +- R/pkg/inst/tests/test_sparkSQL.R| 109 .../scala/org/apache/spark/api/r/SerDe.scala| 6 +- 6 files changed, 267 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f1d4e7e3/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 411126a..f9447f6 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -19,9 +19,11 @@ exportMethods(arrange, count, describe, distinct, + dropna, dtypes, except, explain, + fillna, filter, first, group_by, http://git-wip-us.apache.org/repos/asf/spark/blob/f1d4e7e3/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index e79d324..0af5cb8 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1429,3 +1429,128 @@ setMethod(describe, sdf - callJMethod(x@sdf, describe, listToSeq(colList)) dataFrame(sdf) }) + +#' dropna +#' +#' Returns a new DataFrame omitting rows with null values. +#' +#' @param x A SparkSQL DataFrame. +#' @param how any or all. +#'if any, drop a row if it contains any nulls. +#'if all, drop a row only if all its values are null. +#'if minNonNulls is specified, how is ignored. +#' @param minNonNulls If specified, drop rows that have less than +#'minNonNulls non-null values. +#'This overwrites the how parameter. +#' @param cols Optional list of column names to consider. +#' @return A DataFrame +#' +#' @rdname nafunctions +#' @export +#' @examples +#'\dontrun{ +#' sc - sparkR.init() +#' sqlCtx - sparkRSQL.init(sc) +#' path - path/to/file.json +#' df - jsonFile(sqlCtx, path) +#' dropna(df) +#' } +setMethod(dropna, + signature(x = DataFrame), + function(x, how = c(any, all), minNonNulls = NULL, cols = NULL) { +how - match.arg(how) +if (is.null(cols)) { + cols - columns(x) +} +if (is.null(minNonNulls)) { + minNonNulls - if (how == any) { length(cols) } else { 1 } +} + +naFunctions - callJMethod(x@sdf, na) +sdf - callJMethod(naFunctions, drop, + as.integer(minNonNulls), listToSeq(as.list(cols))) +dataFrame(sdf) + }) + +#' @aliases dropna +#' @export +setMethod(na.omit, + signature(x = DataFrame), + function(x, how = c(any, all), minNonNulls = NULL, cols = NULL) { +dropna(x, how, minNonNulls, cols) + }) + +#' fillna +#' +#' Replace null values. +#' +#' @param x A SparkSQL DataFrame. +#' @param value Value to replace null values with. +#' Should be an integer, numeric, character or named list. +#' If the value is a named list, then cols is ignored and +#' value must be a mapping from column name (character) to +#' replacement value. The replacement value must be an +#' integer, numeric or character. +#' @param cols optional list of column names to consider. +#' Columns specified in cols that do not have matching data +#' type are ignored. For example, if value is a character
spark git commit: [SPARK-7227] [SPARKR] Support fillna / dropna in R DataFrame.
Repository: spark Updated Branches: refs/heads/master 866652c90 - 46576ab30 [SPARK-7227] [SPARKR] Support fillna / dropna in R DataFrame. Author: Sun Rui rui@intel.com Closes #6183 from sun-rui/SPARK-7227 and squashes the following commits: dd6f5b3 [Sun Rui] Rename readEnv() back to readMap(). Add alias na.omit() for dropna(). 41cf725 [Sun Rui] [SPARK-7227][SPARKR] Support fillna / dropna in R DataFrame. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/46576ab3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/46576ab3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/46576ab3 Branch: refs/heads/master Commit: 46576ab303e50c54c3bd464f8939953efe644574 Parents: 866652c Author: Sun Rui rui@intel.com Authored: Sun May 31 15:01:21 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Sun May 31 15:01:59 2015 -0700 -- R/pkg/NAMESPACE | 2 + R/pkg/R/DataFrame.R | 125 +++ R/pkg/R/generics.R | 18 +++ R/pkg/R/serialize.R | 10 +- R/pkg/inst/tests/test_sparkSQL.R| 109 .../scala/org/apache/spark/api/r/SerDe.scala| 6 +- 6 files changed, 267 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/46576ab3/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 411126a..f9447f6 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -19,9 +19,11 @@ exportMethods(arrange, count, describe, distinct, + dropna, dtypes, except, explain, + fillna, filter, first, group_by, http://git-wip-us.apache.org/repos/asf/spark/blob/46576ab3/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index e79d324..0af5cb8 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1429,3 +1429,128 @@ setMethod(describe, sdf - callJMethod(x@sdf, describe, listToSeq(colList)) dataFrame(sdf) }) + +#' dropna +#' +#' Returns a new DataFrame omitting rows with null values. +#' +#' @param x A SparkSQL DataFrame. +#' @param how any or all. +#'if any, drop a row if it contains any nulls. +#'if all, drop a row only if all its values are null. +#'if minNonNulls is specified, how is ignored. +#' @param minNonNulls If specified, drop rows that have less than +#'minNonNulls non-null values. +#'This overwrites the how parameter. +#' @param cols Optional list of column names to consider. +#' @return A DataFrame +#' +#' @rdname nafunctions +#' @export +#' @examples +#'\dontrun{ +#' sc - sparkR.init() +#' sqlCtx - sparkRSQL.init(sc) +#' path - path/to/file.json +#' df - jsonFile(sqlCtx, path) +#' dropna(df) +#' } +setMethod(dropna, + signature(x = DataFrame), + function(x, how = c(any, all), minNonNulls = NULL, cols = NULL) { +how - match.arg(how) +if (is.null(cols)) { + cols - columns(x) +} +if (is.null(minNonNulls)) { + minNonNulls - if (how == any) { length(cols) } else { 1 } +} + +naFunctions - callJMethod(x@sdf, na) +sdf - callJMethod(naFunctions, drop, + as.integer(minNonNulls), listToSeq(as.list(cols))) +dataFrame(sdf) + }) + +#' @aliases dropna +#' @export +setMethod(na.omit, + signature(x = DataFrame), + function(x, how = c(any, all), minNonNulls = NULL, cols = NULL) { +dropna(x, how, minNonNulls, cols) + }) + +#' fillna +#' +#' Replace null values. +#' +#' @param x A SparkSQL DataFrame. +#' @param value Value to replace null values with. +#' Should be an integer, numeric, character or named list. +#' If the value is a named list, then cols is ignored and +#' value must be a mapping from column name (character) to +#' replacement value. The replacement value must be an +#' integer, numeric or character. +#' @param cols optional list of column names to consider. +#' Columns specified in cols that do not have matching data +#' type are ignored. For example, if value is a character, and +#' subset contains a non-character column, then the non-character +#' column is simply ignored. +#' @return A DataFrame
spark git commit: [SPARK-8028] [SPARKR] Use addJar instead of setJars in SparkR
Repository: spark Updated Branches: refs/heads/branch-1.4 f5a9833f3 - cbfb682ab [SPARK-8028] [SPARKR] Use addJar instead of setJars in SparkR This prevents the spark.jars from being cleared while using `--packages` or `--jars` cc pwendell davies brkyvz Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Closes #6568 from shivaram/SPARK-8028 and squashes the following commits: 3a9cf1f [Shivaram Venkataraman] Use addJar instead of setJars in SparkR This prevents the spark.jars from being cleared (cherry picked from commit 6b44278ef7cd2a278dfa67e8393ef30775c72726) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cbfb682a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cbfb682a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cbfb682a Branch: refs/heads/branch-1.4 Commit: cbfb682ab90d259ca716ef6987b4ca367b79eda3 Parents: f5a9833 Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Authored: Mon Jun 1 21:01:14 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Mon Jun 1 21:01:26 2015 -0700 -- core/src/main/scala/org/apache/spark/api/r/RRDD.scala | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/cbfb682a/core/src/main/scala/org/apache/spark/api/r/RRDD.scala -- diff --git a/core/src/main/scala/org/apache/spark/api/r/RRDD.scala b/core/src/main/scala/org/apache/spark/api/r/RRDD.scala index e020458..4dfa732 100644 --- a/core/src/main/scala/org/apache/spark/api/r/RRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/r/RRDD.scala @@ -355,7 +355,6 @@ private[r] object RRDD { val sparkConf = new SparkConf().setAppName(appName) .setSparkHome(sparkHome) - .setJars(jars) // Override `master` if we have a user-specified value if (master != ) { @@ -373,7 +372,11 @@ private[r] object RRDD { sparkConf.setExecutorEnv(name.asInstanceOf[String], value.asInstanceOf[String]) } -new JavaSparkContext(sparkConf) +val jsc = new JavaSparkContext(sparkConf) +jars.foreach { jar = + jsc.addJar(jar) +} +jsc } /** - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8028] [SPARKR] Use addJar instead of setJars in SparkR
Repository: spark Updated Branches: refs/heads/master 15d7c90ae - 6b44278ef [SPARK-8028] [SPARKR] Use addJar instead of setJars in SparkR This prevents the spark.jars from being cleared while using `--packages` or `--jars` cc pwendell davies brkyvz Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Closes #6568 from shivaram/SPARK-8028 and squashes the following commits: 3a9cf1f [Shivaram Venkataraman] Use addJar instead of setJars in SparkR This prevents the spark.jars from being cleared Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6b44278e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6b44278e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6b44278e Branch: refs/heads/master Commit: 6b44278ef7cd2a278dfa67e8393ef30775c72726 Parents: 15d7c90 Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Authored: Mon Jun 1 21:01:14 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Mon Jun 1 21:01:14 2015 -0700 -- core/src/main/scala/org/apache/spark/api/r/RRDD.scala | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6b44278e/core/src/main/scala/org/apache/spark/api/r/RRDD.scala -- diff --git a/core/src/main/scala/org/apache/spark/api/r/RRDD.scala b/core/src/main/scala/org/apache/spark/api/r/RRDD.scala index e020458..4dfa732 100644 --- a/core/src/main/scala/org/apache/spark/api/r/RRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/r/RRDD.scala @@ -355,7 +355,6 @@ private[r] object RRDD { val sparkConf = new SparkConf().setAppName(appName) .setSparkHome(sparkHome) - .setJars(jars) // Override `master` if we have a user-specified value if (master != ) { @@ -373,7 +372,11 @@ private[r] object RRDD { sparkConf.setExecutorEnv(name.asInstanceOf[String], value.asInstanceOf[String]) } -new JavaSparkContext(sparkConf) +val jsc = new JavaSparkContext(sparkConf) +jars.foreach { jar = + jsc.addJar(jar) +} +jsc } /** - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8027] [SPARKR] Add maven profile to build R package docs
Repository: spark Updated Branches: refs/heads/master 89f642a0e - cae9306c4 [SPARK-8027] [SPARKR] Add maven profile to build R package docs Also use that profile in create-release.sh cc pwendell -- Note that this means that we need `knitr` and `roxygen` installed on the machines used for building the release. Let me know if you need help with that. Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Closes #6567 from shivaram/SPARK-8027 and squashes the following commits: 8dc8ecf [Shivaram Venkataraman] Add maven profile to build R package docs Also use that profile in create-release.sh Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cae9306c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cae9306c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cae9306c Branch: refs/heads/master Commit: cae9306c4f437c722baa57593fe83f4b7d82dbff Parents: 89f642a Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Authored: Mon Jun 1 21:21:45 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Mon Jun 1 21:21:45 2015 -0700 -- core/pom.xml | 23 +++ dev/create-release/create-release.sh | 16 2 files changed, 31 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/cae9306c/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index 5c02be8..a021842 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -481,6 +481,29 @@ /plugins /build /profile +profile + idsparkr-docs/id + build +plugins + plugin +groupIdorg.codehaus.mojo/groupId +artifactIdexec-maven-plugin/artifactId +executions + execution +idsparkr-pkg-docs/id +phasecompile/phase +goals + goalexec/goal +/goals + /execution +/executions +configuration + executable..${path.separator}R${path.separator}create-docs${script.extension}/executable +/configuration + /plugin +/plugins + /build +/profile /profiles /project http://git-wip-us.apache.org/repos/asf/spark/blob/cae9306c/dev/create-release/create-release.sh -- diff --git a/dev/create-release/create-release.sh b/dev/create-release/create-release.sh index 54274a8..0b14a61 100755 --- a/dev/create-release/create-release.sh +++ b/dev/create-release/create-release.sh @@ -228,14 +228,14 @@ if [[ ! $@ =~ --skip-package ]]; then # We increment the Zinc port each time to avoid OOM's and other craziness if multiple builds # share the same Zinc server. - make_binary_release hadoop1 -Psparkr -Phadoop-1 -Phive -Phive-thriftserver 3030 - make_binary_release hadoop1-scala2.11 -Psparkr -Phadoop-1 -Phive -Dscala-2.11 3031 - make_binary_release cdh4 -Psparkr -Phadoop-1 -Phive -Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0 3032 - make_binary_release hadoop2.3 -Psparkr -Phadoop-2.3 -Phive -Phive-thriftserver -Pyarn 3033 - make_binary_release hadoop2.4 -Psparkr -Phadoop-2.4 -Phive -Phive-thriftserver -Pyarn 3034 - make_binary_release mapr3 -Pmapr3 -Psparkr -Phive -Phive-thriftserver 3035 - make_binary_release mapr4 -Pmapr4 -Psparkr -Pyarn -Phive -Phive-thriftserver 3036 - make_binary_release hadoop2.4-without-hive -Psparkr -Phadoop-2.4 -Pyarn 3037 + make_binary_release hadoop1 -Psparkr -Psparkr-docs -Phadoop-1 -Phive -Phive-thriftserver 3030 + make_binary_release hadoop1-scala2.11 -Psparkr -Psparkr-docs -Phadoop-1 -Phive -Dscala-2.11 3031 + make_binary_release cdh4 -Psparkr -Psparkr-docs -Phadoop-1 -Phive -Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0 3032 + make_binary_release hadoop2.3 -Psparkr -Psparkr-docs -Phadoop-2.3 -Phive -Phive-thriftserver -Pyarn 3033 + make_binary_release hadoop2.4 -Psparkr -Psparkr-docs -Phadoop-2.4 -Phive -Phive-thriftserver -Pyarn 3034 + make_binary_release mapr3 -Pmapr3 -Psparkr -Psparkr-docs -Phive -Phive-thriftserver 3035 + make_binary_release mapr4 -Pmapr4 -Psparkr -Psparkr-docs -Pyarn -Phive -Phive-thriftserver 3036 + make_binary_release hadoop2.4-without-hive -Psparkr -Psparkr-docs -Phadoop-2.4 -Pyarn 3037 wait rm -rf spark-$RELEASE_VERSION-bin-*/ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8027] [SPARKR] Add maven profile to build R package docs
Repository: spark Updated Branches: refs/heads/branch-1.4 8ac23762e - d542a35ad [SPARK-8027] [SPARKR] Add maven profile to build R package docs Also use that profile in create-release.sh cc pwendell -- Note that this means that we need `knitr` and `roxygen` installed on the machines used for building the release. Let me know if you need help with that. Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Closes #6567 from shivaram/SPARK-8027 and squashes the following commits: 8dc8ecf [Shivaram Venkataraman] Add maven profile to build R package docs Also use that profile in create-release.sh (cherry picked from commit cae9306c4f437c722baa57593fe83f4b7d82dbff) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d542a35a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d542a35a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d542a35a Branch: refs/heads/branch-1.4 Commit: d542a35ad74a9e530d0160adf9d10ff7c3075d0d Parents: 8ac2376 Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Authored: Mon Jun 1 21:21:45 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Mon Jun 1 21:21:55 2015 -0700 -- core/pom.xml | 23 +++ dev/create-release/create-release.sh | 16 2 files changed, 31 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d542a35a/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index e58efe4..1f903fc 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -475,6 +475,29 @@ /plugins /build /profile +profile + idsparkr-docs/id + build +plugins + plugin +groupIdorg.codehaus.mojo/groupId +artifactIdexec-maven-plugin/artifactId +executions + execution +idsparkr-pkg-docs/id +phasecompile/phase +goals + goalexec/goal +/goals + /execution +/executions +configuration + executable..${path.separator}R${path.separator}create-docs${script.extension}/executable +/configuration + /plugin +/plugins + /build +/profile /profiles /project http://git-wip-us.apache.org/repos/asf/spark/blob/d542a35a/dev/create-release/create-release.sh -- diff --git a/dev/create-release/create-release.sh b/dev/create-release/create-release.sh index 54274a8..0b14a61 100755 --- a/dev/create-release/create-release.sh +++ b/dev/create-release/create-release.sh @@ -228,14 +228,14 @@ if [[ ! $@ =~ --skip-package ]]; then # We increment the Zinc port each time to avoid OOM's and other craziness if multiple builds # share the same Zinc server. - make_binary_release hadoop1 -Psparkr -Phadoop-1 -Phive -Phive-thriftserver 3030 - make_binary_release hadoop1-scala2.11 -Psparkr -Phadoop-1 -Phive -Dscala-2.11 3031 - make_binary_release cdh4 -Psparkr -Phadoop-1 -Phive -Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0 3032 - make_binary_release hadoop2.3 -Psparkr -Phadoop-2.3 -Phive -Phive-thriftserver -Pyarn 3033 - make_binary_release hadoop2.4 -Psparkr -Phadoop-2.4 -Phive -Phive-thriftserver -Pyarn 3034 - make_binary_release mapr3 -Pmapr3 -Psparkr -Phive -Phive-thriftserver 3035 - make_binary_release mapr4 -Pmapr4 -Psparkr -Pyarn -Phive -Phive-thriftserver 3036 - make_binary_release hadoop2.4-without-hive -Psparkr -Phadoop-2.4 -Pyarn 3037 + make_binary_release hadoop1 -Psparkr -Psparkr-docs -Phadoop-1 -Phive -Phive-thriftserver 3030 + make_binary_release hadoop1-scala2.11 -Psparkr -Psparkr-docs -Phadoop-1 -Phive -Dscala-2.11 3031 + make_binary_release cdh4 -Psparkr -Psparkr-docs -Phadoop-1 -Phive -Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0 3032 + make_binary_release hadoop2.3 -Psparkr -Psparkr-docs -Phadoop-2.3 -Phive -Phive-thriftserver -Pyarn 3033 + make_binary_release hadoop2.4 -Psparkr -Psparkr-docs -Phadoop-2.4 -Phive -Phive-thriftserver -Pyarn 3034 + make_binary_release mapr3 -Pmapr3 -Psparkr -Psparkr-docs -Phive -Phive-thriftserver 3035 + make_binary_release mapr4 -Pmapr4 -Psparkr -Psparkr-docs -Pyarn -Phive -Phive-thriftserver 3036 + make_binary_release hadoop2.4-without-hive -Psparkr -Psparkr-docs -Phadoop-2.4 -Pyarn 3037 wait rm -rf spark-$RELEASE_VERSION-bin-*/ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h
spark git commit: [SPARK-7714] [SPARKR] SparkR tests should use more specific expectations than expect_true
Repository: spark Updated Branches: refs/heads/master fdcad6ef4 - 69c5dee2f [SPARK-7714] [SPARKR] SparkR tests should use more specific expectations than expect_true 1. Update the pattern 'expect_true(a == b)' to 'expect_equal(a, b)'. 2. Update the pattern 'expect_true(inherits(a, b))' to 'expect_is(a, b)'. 3. Update the pattern 'expect_true(identical(a, b))' to 'expect_identical(a, b)'. Author: Sun Rui rui@intel.com Closes #7152 from sun-rui/SPARK-7714 and squashes the following commits: 8ad2440 [Sun Rui] Fix test case errors. 8fe9f0c [Sun Rui] Update the pattern 'expect_true(identical(a, b))' to 'expect_identical(a, b)'. f1b8005 [Sun Rui] Update the pattern 'expect_true(inherits(a, b))' to 'expect_is(a, b)'. f631e94 [Sun Rui] Update the pattern 'expect_true(a == b)' to 'expect_equal(a, b)'. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/69c5dee2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/69c5dee2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/69c5dee2 Branch: refs/heads/master Commit: 69c5dee2f01b1ae35bd813d31d46429a32cb475d Parents: fdcad6e Author: Sun Rui rui@intel.com Authored: Wed Jul 1 09:50:12 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Wed Jul 1 09:50:12 2015 -0700 -- R/pkg/inst/tests/test_binaryFile.R | 2 +- R/pkg/inst/tests/test_binary_function.R | 4 +- R/pkg/inst/tests/test_includeJAR.R | 4 +- R/pkg/inst/tests/test_parallelize_collect.R | 2 +- R/pkg/inst/tests/test_rdd.R | 4 +- R/pkg/inst/tests/test_sparkSQL.R| 354 +++ R/pkg/inst/tests/test_take.R| 8 +- R/pkg/inst/tests/test_textFile.R| 6 +- R/pkg/inst/tests/test_utils.R | 4 +- 9 files changed, 194 insertions(+), 194 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/69c5dee2/R/pkg/inst/tests/test_binaryFile.R -- diff --git a/R/pkg/inst/tests/test_binaryFile.R b/R/pkg/inst/tests/test_binaryFile.R index 4db7266..ccaea18 100644 --- a/R/pkg/inst/tests/test_binaryFile.R +++ b/R/pkg/inst/tests/test_binaryFile.R @@ -82,7 +82,7 @@ test_that(saveAsObjectFile()/objectFile() works with multiple paths, { saveAsObjectFile(rdd2, fileName2) rdd - objectFile(sc, c(fileName1, fileName2)) - expect_true(count(rdd) == 2) + expect_equal(count(rdd), 2) unlink(fileName1, recursive = TRUE) unlink(fileName2, recursive = TRUE) http://git-wip-us.apache.org/repos/asf/spark/blob/69c5dee2/R/pkg/inst/tests/test_binary_function.R -- diff --git a/R/pkg/inst/tests/test_binary_function.R b/R/pkg/inst/tests/test_binary_function.R index a1e354e..3be8c65 100644 --- a/R/pkg/inst/tests/test_binary_function.R +++ b/R/pkg/inst/tests/test_binary_function.R @@ -38,13 +38,13 @@ test_that(union on two RDDs, { union.rdd - unionRDD(rdd, text.rdd) actual - collect(union.rdd) expect_equal(actual, c(as.list(nums), mockFile)) - expect_true(getSerializedMode(union.rdd) == byte) + expect_equal(getSerializedMode(union.rdd), byte) rdd- map(text.rdd, function(x) {x}) union.rdd - unionRDD(rdd, text.rdd) actual - collect(union.rdd) expect_equal(actual, as.list(c(mockFile, mockFile))) - expect_true(getSerializedMode(union.rdd) == byte) + expect_equal(getSerializedMode(union.rdd), byte) unlink(fileName) }) http://git-wip-us.apache.org/repos/asf/spark/blob/69c5dee2/R/pkg/inst/tests/test_includeJAR.R -- diff --git a/R/pkg/inst/tests/test_includeJAR.R b/R/pkg/inst/tests/test_includeJAR.R index 8bc693b..844d86f 100644 --- a/R/pkg/inst/tests/test_includeJAR.R +++ b/R/pkg/inst/tests/test_includeJAR.R @@ -31,7 +31,7 @@ runScript - function() { test_that(sparkJars tag in SparkContext, { testOutput - runScript() helloTest - testOutput[1] - expect_true(helloTest == Hello, Dave) + expect_equal(helloTest, Hello, Dave) basicFunction - testOutput[2] - expect_true(basicFunction == 4L) + expect_equal(basicFunction, 4) }) http://git-wip-us.apache.org/repos/asf/spark/blob/69c5dee2/R/pkg/inst/tests/test_parallelize_collect.R -- diff --git a/R/pkg/inst/tests/test_parallelize_collect.R b/R/pkg/inst/tests/test_parallelize_collect.R index fff0286..2552127 100644 --- a/R/pkg/inst/tests/test_parallelize_collect.R +++ b/R/pkg/inst/tests/test_parallelize_collect.R @@ -57,7 +57,7 @@ test_that(parallelize() on simple vectors and lists returns an RDD, { strListRDD2) for (rdd in rdds) { -expect_true(inherits(rdd, RDD
spark git commit: [SPARK-8596] [EC2] Added port for Rstudio
Repository: spark Updated Branches: refs/heads/master ec7843819 - 9ce78b434 [SPARK-8596] [EC2] Added port for Rstudio This would otherwise need to be set manually by R users in AWS. https://issues.apache.org/jira/browse/SPARK-8596 Author: Vincent D. Warmerdam vincentwarmer...@gmail.com Author: vincent vincentwarmer...@gmail.com Closes #7068 from koaning/rstudio-port-number and squashes the following commits: ac8100d [vincent] Update spark_ec2.py ce6ad88 [Vincent D. Warmerdam] added port number for rstudio Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9ce78b43 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9ce78b43 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9ce78b43 Branch: refs/heads/master Commit: 9ce78b4343febe87c4edd650c698cc20d38f615d Parents: ec78438 Author: Vincent D. Warmerdam vincentwarmer...@gmail.com Authored: Sun Jun 28 13:33:33 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Sun Jun 28 13:33:33 2015 -0700 -- ec2/spark_ec2.py | 2 ++ 1 file changed, 2 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9ce78b43/ec2/spark_ec2.py -- diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index e4932cf..18ccbc0 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -505,6 +505,8 @@ def launch_cluster(conn, opts, cluster_name): master_group.authorize('tcp', 50070, 50070, authorized_address) master_group.authorize('tcp', 60070, 60070, authorized_address) master_group.authorize('tcp', 4040, 4045, authorized_address) +# Rstudio (GUI for R) needs port 8787 for web access +master_group.authorize('tcp', 8787, 8787, authorized_address) # HDFS NFS gateway requires 111,2049,4242 for tcp udp master_group.authorize('tcp', 111, 111, authorized_address) master_group.authorize('udp', 111, 111, authorized_address) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8549] [SPARKR] Fix the line length of SparkR
Repository: spark Updated Branches: refs/heads/master f9c448dce - a0cb111b2 [SPARK-8549] [SPARKR] Fix the line length of SparkR [[SPARK-8549] Fix the line length of SparkR - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-8549) Author: Yu ISHIKAWA yuu.ishik...@gmail.com Closes #7204 from yu-iskw/SPARK-8549 and squashes the following commits: 6fb131a [Yu ISHIKAWA] Fix the typo 1737598 [Yu ISHIKAWA] [SPARK-8549][SparkR] Fix the line length of SparkR Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a0cb111b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a0cb111b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a0cb111b Branch: refs/heads/master Commit: a0cb111b22cb093e86b0daeecb3dcc41d095df40 Parents: f9c448d Author: Yu ISHIKAWA yuu.ishik...@gmail.com Authored: Sun Jul 5 20:50:02 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Sun Jul 5 20:50:02 2015 -0700 -- R/pkg/R/generics.R | 3 ++- R/pkg/R/pairRDD.R | 12 ++-- R/pkg/R/sparkR.R | 9 ++--- R/pkg/R/utils.R| 31 ++- R/pkg/inst/tests/test_includeJAR.R | 4 ++-- R/pkg/inst/tests/test_rdd.R| 12 R/pkg/inst/tests/test_sparkSQL.R | 11 +-- 7 files changed, 51 insertions(+), 31 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a0cb111b/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 79055b7..fad9d71 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -20,7 +20,8 @@ # @rdname aggregateRDD # @seealso reduce # @export -setGeneric(aggregateRDD, function(x, zeroValue, seqOp, combOp) { standardGeneric(aggregateRDD) }) +setGeneric(aggregateRDD, + function(x, zeroValue, seqOp, combOp) { standardGeneric(aggregateRDD) }) # @rdname cache-methods # @export http://git-wip-us.apache.org/repos/asf/spark/blob/a0cb111b/R/pkg/R/pairRDD.R -- diff --git a/R/pkg/R/pairRDD.R b/R/pkg/R/pairRDD.R index 7f902ba..0f1179e 100644 --- a/R/pkg/R/pairRDD.R +++ b/R/pkg/R/pairRDD.R @@ -560,8 +560,8 @@ setMethod(join, # Left outer join two RDDs # # @description -# \code{leftouterjoin} This function left-outer-joins two RDDs where every element is of the form list(K, V). -# The key types of the two RDDs should be the same. +# \code{leftouterjoin} This function left-outer-joins two RDDs where every element is of +# the form list(K, V). The key types of the two RDDs should be the same. # # @param x An RDD to be joined. Should be an RDD where each element is # list(K, V). @@ -597,8 +597,8 @@ setMethod(leftOuterJoin, # Right outer join two RDDs # # @description -# \code{rightouterjoin} This function right-outer-joins two RDDs where every element is of the form list(K, V). -# The key types of the two RDDs should be the same. +# \code{rightouterjoin} This function right-outer-joins two RDDs where every element is of +# the form list(K, V). The key types of the two RDDs should be the same. # # @param x An RDD to be joined. Should be an RDD where each element is # list(K, V). @@ -634,8 +634,8 @@ setMethod(rightOuterJoin, # Full outer join two RDDs # # @description -# \code{fullouterjoin} This function full-outer-joins two RDDs where every element is of the form list(K, V). -# The key types of the two RDDs should be the same. +# \code{fullouterjoin} This function full-outer-joins two RDDs where every element is of +# the form list(K, V). The key types of the two RDDs should be the same. # # @param x An RDD to be joined. Should be an RDD where each element is # list(K, V). http://git-wip-us.apache.org/repos/asf/spark/blob/a0cb111b/R/pkg/R/sparkR.R -- diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index 86233e0..048eb8e 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -105,7 +105,8 @@ sparkR.init - function( sparkPackages = ) { if (exists(.sparkRjsc, envir = .sparkREnv)) { -cat(Re-using existing Spark Context. Please stop SparkR with sparkR.stop() or restart R to create a new Spark Context\n) +cat(paste(Re-using existing Spark Context., + Please stop SparkR with sparkR.stop() or restart R to create a new Spark Context\n)) return(get(.sparkRjsc, envir = .sparkREnv)) } @@ -180,14 +181,16 @@ sparkR.init - function( sparkExecutorEnvMap - new.env() if (!any(names(sparkExecutorEnv) == LD_LIBRARY_PATH)) { -sparkExecutorEnvMap[[LD_LIBRARY_PATH]] - paste0($LD_LIBRARY_PATH:,Sys.getenv(LD_LIBRARY_PATH
spark git commit: [HOTFIX] Copy SparkR lib if it exists in make-distribution
Repository: spark Updated Branches: refs/heads/branch-1.4 8d6d8a538 - fbc4480d9 [HOTFIX] Copy SparkR lib if it exists in make-distribution This is to fix an issue reported in #6373 where the `cp` would fail if `-Psparkr` was not used in the build cc dragos pwendell Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Closes #6379 from shivaram/make-distribution-hotfix and squashes the following commits: 08eb7e4 [Shivaram Venkataraman] Copy SparkR lib if it exists in make-distribution (cherry picked from commit b231baa24857ea83c8062dd4e033db4e35bf457d) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fbc4480d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fbc4480d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fbc4480d Branch: refs/heads/branch-1.4 Commit: fbc4480d9359a10609b79d429a15a244eff5f65f Parents: 8d6d8a5 Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Authored: Sat May 23 12:28:16 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Sat May 23 12:28:24 2015 -0700 -- make-distribution.sh | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fbc4480d/make-distribution.sh -- diff --git a/make-distribution.sh b/make-distribution.sh index 7882734..a2b0c43 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -229,10 +229,13 @@ cp $SPARK_HOME/conf/*.template $DISTDIR/conf cp $SPARK_HOME/README.md $DISTDIR cp -r $SPARK_HOME/bin $DISTDIR cp -r $SPARK_HOME/python $DISTDIR -mkdir -p $DISTDIR/R/lib -cp -r $SPARK_HOME/R/lib/SparkR $DISTDIR/R/lib cp -r $SPARK_HOME/sbin $DISTDIR cp -r $SPARK_HOME/ec2 $DISTDIR +# Copy SparkR if it exists +if [ -d $SPARK_HOME/R/lib/SparkR ]; then + mkdir -p $DISTDIR/R/lib + cp -r $SPARK_HOME/R/lib/SparkR $DISTDIR/R/lib +fi # Download and copy in tachyon, if requested if [ $SPARK_TACHYON == true ]; then - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [HOTFIX] Copy SparkR lib if it exists in make-distribution
Repository: spark Updated Branches: refs/heads/master 2b7e63585 - b231baa24 [HOTFIX] Copy SparkR lib if it exists in make-distribution This is to fix an issue reported in #6373 where the `cp` would fail if `-Psparkr` was not used in the build cc dragos pwendell Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Closes #6379 from shivaram/make-distribution-hotfix and squashes the following commits: 08eb7e4 [Shivaram Venkataraman] Copy SparkR lib if it exists in make-distribution Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b231baa2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b231baa2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b231baa2 Branch: refs/heads/master Commit: b231baa24857ea83c8062dd4e033db4e35bf457d Parents: 2b7e635 Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Authored: Sat May 23 12:28:16 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Sat May 23 12:28:16 2015 -0700 -- make-distribution.sh | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b231baa2/make-distribution.sh -- diff --git a/make-distribution.sh b/make-distribution.sh index 7882734..a2b0c43 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -229,10 +229,13 @@ cp $SPARK_HOME/conf/*.template $DISTDIR/conf cp $SPARK_HOME/README.md $DISTDIR cp -r $SPARK_HOME/bin $DISTDIR cp -r $SPARK_HOME/python $DISTDIR -mkdir -p $DISTDIR/R/lib -cp -r $SPARK_HOME/R/lib/SparkR $DISTDIR/R/lib cp -r $SPARK_HOME/sbin $DISTDIR cp -r $SPARK_HOME/ec2 $DISTDIR +# Copy SparkR if it exists +if [ -d $SPARK_HOME/R/lib/SparkR ]; then + mkdir -p $DISTDIR/R/lib + cp -r $SPARK_HOME/R/lib/SparkR $DISTDIR/R/lib +fi # Download and copy in tachyon, if requested if [ $SPARK_TACHYON == true ]; then - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-6811] Copy SparkR lib in make-distribution.sh
Repository: spark Updated Branches: refs/heads/master 7af3818c6 - a40bca011 [SPARK-6811] Copy SparkR lib in make-distribution.sh This change also remove native libraries from SparkR to make sure our distribution works across platforms Tested by building on Mac, running on Amazon Linux (CentOS), Windows VM and vice-versa (built on Linux run on Mac) I will also test this with YARN soon and update this PR. Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Closes #6373 from shivaram/sparkr-binary and squashes the following commits: ae41b5c [Shivaram Venkataraman] Remove native libraries from SparkR Also include the built SparkR package in make-distribution.sh Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a40bca01 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a40bca01 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a40bca01 Branch: refs/heads/master Commit: a40bca0111de45763c3ef4270afb2185c16b8f95 Parents: 7af3818 Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Authored: Sat May 23 00:04:01 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Sat May 23 00:04:01 2015 -0700 -- R/pkg/NAMESPACE | 5 +++- R/pkg/R/utils.R | 38 - R/pkg/src-native/Makefile | 27 ++ R/pkg/src-native/Makefile.win | 27 ++ R/pkg/src-native/string_hash_code.c | 49 R/pkg/src/Makefile | 27 -- R/pkg/src/Makefile.win | 27 -- R/pkg/src/string_hash_code.c| 49 make-distribution.sh| 2 ++ 9 files changed, 146 insertions(+), 105 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 64ffdcf..411126a 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -1,6 +1,9 @@ # Imports from base R importFrom(methods, setGeneric, setMethod, setOldClass) -useDynLib(SparkR, stringHashCode) + +# Disable native libraries till we figure out how to package it +# See SPARKR-7839 +#useDynLib(SparkR, stringHashCode) # S3 methods exported export(sparkR.init) http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/R/utils.R -- diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index 0e7b7bd..69b2700 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -122,13 +122,49 @@ hashCode - function(key) { intBits - packBits(rawToBits(rawVec), integer) as.integer(bitwXor(intBits[2], intBits[1])) } else if (class(key) == character) { -.Call(stringHashCode, key) +# TODO: SPARK-7839 means we might not have the native library available +if (is.loaded(stringHashCode)) { + .Call(stringHashCode, key) +} else { + n - nchar(key) + if (n == 0) { +0L + } else { +asciiVals - sapply(charToRaw(key), function(x) { strtoi(x, 16L) }) +hashC - 0 +for (k in 1:length(asciiVals)) { + hashC - mult31AndAdd(hashC, asciiVals[k]) +} +as.integer(hashC) + } +} } else { warning(paste(Could not hash object, returning 0, sep = )) as.integer(0) } } +# Helper function used to wrap a 'numeric' value to integer bounds. +# Useful for implementing C-like integer arithmetic +wrapInt - function(value) { + if (value .Machine$integer.max) { +value - value - 2 * .Machine$integer.max - 2 + } else if (value -1 * .Machine$integer.max) { +value - 2 * .Machine$integer.max + value + 2 + } + value +} + +# Multiply `val` by 31 and add `addVal` to the result. Ensures that +# integer-overflows are handled at every step. +mult31AndAdd - function(val, addVal) { + vec - c(bitwShiftL(val, c(4,3,2,1,0)), addVal) + Reduce(function(a, b) { + wrapInt(as.numeric(a) + as.numeric(b)) + }, + vec) +} + # Create a new RDD with serializedMode == byte. # Return itself if already in byte format. serializeToBytes - function(rdd) { http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/src-native/Makefile -- diff --git a/R/pkg/src-native/Makefile b/R/pkg/src-native/Makefile new file mode 100644 index 000..a55a56f --- /dev/null +++ b/R/pkg/src-native/Makefile @@ -0,0 +1,27 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses
spark git commit: [SPARK-6811] Copy SparkR lib in make-distribution.sh
Repository: spark Updated Branches: refs/heads/branch-1.4 c636b87dc - c8eb76ba6 [SPARK-6811] Copy SparkR lib in make-distribution.sh This change also remove native libraries from SparkR to make sure our distribution works across platforms Tested by building on Mac, running on Amazon Linux (CentOS), Windows VM and vice-versa (built on Linux run on Mac) I will also test this with YARN soon and update this PR. Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Closes #6373 from shivaram/sparkr-binary and squashes the following commits: ae41b5c [Shivaram Venkataraman] Remove native libraries from SparkR Also include the built SparkR package in make-distribution.sh (cherry picked from commit a40bca0111de45763c3ef4270afb2185c16b8f95) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c8eb76ba Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c8eb76ba Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c8eb76ba Branch: refs/heads/branch-1.4 Commit: c8eb76ba673026f2fb2b22e8b3e8102a5940297c Parents: c636b87 Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Authored: Sat May 23 00:04:01 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Sat May 23 00:04:32 2015 -0700 -- R/pkg/NAMESPACE | 5 +++- R/pkg/R/utils.R | 38 - R/pkg/src-native/Makefile | 27 ++ R/pkg/src-native/Makefile.win | 27 ++ R/pkg/src-native/string_hash_code.c | 49 R/pkg/src/Makefile | 27 -- R/pkg/src/Makefile.win | 27 -- R/pkg/src/string_hash_code.c| 49 make-distribution.sh| 2 ++ 9 files changed, 146 insertions(+), 105 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c8eb76ba/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 64ffdcf..411126a 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -1,6 +1,9 @@ # Imports from base R importFrom(methods, setGeneric, setMethod, setOldClass) -useDynLib(SparkR, stringHashCode) + +# Disable native libraries till we figure out how to package it +# See SPARKR-7839 +#useDynLib(SparkR, stringHashCode) # S3 methods exported export(sparkR.init) http://git-wip-us.apache.org/repos/asf/spark/blob/c8eb76ba/R/pkg/R/utils.R -- diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index 0e7b7bd..69b2700 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -122,13 +122,49 @@ hashCode - function(key) { intBits - packBits(rawToBits(rawVec), integer) as.integer(bitwXor(intBits[2], intBits[1])) } else if (class(key) == character) { -.Call(stringHashCode, key) +# TODO: SPARK-7839 means we might not have the native library available +if (is.loaded(stringHashCode)) { + .Call(stringHashCode, key) +} else { + n - nchar(key) + if (n == 0) { +0L + } else { +asciiVals - sapply(charToRaw(key), function(x) { strtoi(x, 16L) }) +hashC - 0 +for (k in 1:length(asciiVals)) { + hashC - mult31AndAdd(hashC, asciiVals[k]) +} +as.integer(hashC) + } +} } else { warning(paste(Could not hash object, returning 0, sep = )) as.integer(0) } } +# Helper function used to wrap a 'numeric' value to integer bounds. +# Useful for implementing C-like integer arithmetic +wrapInt - function(value) { + if (value .Machine$integer.max) { +value - value - 2 * .Machine$integer.max - 2 + } else if (value -1 * .Machine$integer.max) { +value - 2 * .Machine$integer.max + value + 2 + } + value +} + +# Multiply `val` by 31 and add `addVal` to the result. Ensures that +# integer-overflows are handled at every step. +mult31AndAdd - function(val, addVal) { + vec - c(bitwShiftL(val, c(4,3,2,1,0)), addVal) + Reduce(function(a, b) { + wrapInt(as.numeric(a) + as.numeric(b)) + }, + vec) +} + # Create a new RDD with serializedMode == byte. # Return itself if already in byte format. serializeToBytes - function(rdd) { http://git-wip-us.apache.org/repos/asf/spark/blob/c8eb76ba/R/pkg/src-native/Makefile -- diff --git a/R/pkg/src-native/Makefile b/R/pkg/src-native/Makefile new file mode 100644 index 000..a55a56f --- /dev/null +++ b/R/pkg/src-native/Makefile @@ -0,0 +1,27 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license
spark git commit: [SPARK-6806] [SPARKR] [DOCS] Fill in SparkR examples in programming guide
Repository: spark Updated Branches: refs/heads/branch-1.4 b928db4fe - c636b87dc [SPARK-6806] [SPARKR] [DOCS] Fill in SparkR examples in programming guide sqlCtx - sqlContext You can check the docs by: ``` $ cd docs $ SKIP_SCALADOC=1 jekyll serve ``` cc shivaram Author: Davies Liu dav...@databricks.com Closes #5442 from davies/r_docs and squashes the following commits: 7a12ec6 [Davies Liu] remove rdd in R docs 8496b26 [Davies Liu] remove the docs related to RDD e23b9d6 [Davies Liu] delete R docs for RDD API 222e4ff [Davies Liu] Merge branch 'master' into r_docs 89684ce [Davies Liu] Merge branch 'r_docs' of github.com:davies/spark into r_docs f0a10e1 [Davies Liu] address comments from @shivaram f61de71 [Davies Liu] Update pairRDD.R 3ef7cf3 [Davies Liu] use + instead of function(a,b) a+b 2f10a77 [Davies Liu] address comments from @cafreeman 9c2a062 [Davies Liu] mention R api together with Python API 23f751a [Davies Liu] Fill in SparkR examples in programming guide (cherry picked from commit 7af3818c6b2bf35bfa531ab7cc3a4a714385015e) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c636b87d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c636b87d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c636b87d Branch: refs/heads/branch-1.4 Commit: c636b87dc287ce99a887bc59cad31aaf48477a56 Parents: b928db4 Author: Davies Liu dav...@databricks.com Authored: Sat May 23 00:00:30 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Sat May 23 00:02:22 2015 -0700 -- R/README.md | 4 +- R/pkg/R/DataFrame.R | 176 R/pkg/R/RDD.R| 2 +- R/pkg/R/SQLContext.R | 165 --- R/pkg/R/pairRDD.R| 4 +- R/pkg/R/sparkR.R | 10 +- R/pkg/inst/profile/shell.R | 6 +- R/pkg/inst/tests/test_sparkSQL.R | 156 +++--- docs/_plugins/copy_api_dirs.rb | 68 --- docs/api.md | 3 +- docs/index.md| 23 ++- docs/programming-guide.md| 21 +- docs/quick-start.md | 18 +- docs/sql-programming-guide.md| 373 +- 14 files changed, 706 insertions(+), 323 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c636b87d/R/README.md -- diff --git a/R/README.md b/R/README.md index a6970e3..d7d65b4 100644 --- a/R/README.md +++ b/R/README.md @@ -52,7 +52,7 @@ The SparkR documentation (Rd files and HTML files) are not a part of the source SparkR comes with several sample programs in the `examples/src/main/r` directory. To run one of them, use `./bin/sparkR filename args`. For example: -./bin/sparkR examples/src/main/r/pi.R local[2] +./bin/sparkR examples/src/main/r/dataframe.R You can also run the unit-tests for SparkR by running (you need to install the [testthat](http://cran.r-project.org/web/packages/testthat/index.html) package first): @@ -63,5 +63,5 @@ You can also run the unit-tests for SparkR by running (you need to install the [ The `./bin/spark-submit` and `./bin/sparkR` can also be used to submit jobs to YARN clusters. You will need to set YARN conf dir before doing so. For example on CDH you can run ``` export YARN_CONF_DIR=/etc/hadoop/conf -./bin/spark-submit --master yarn examples/src/main/r/pi.R 4 +./bin/spark-submit --master yarn examples/src/main/r/dataframe.R ``` http://git-wip-us.apache.org/repos/asf/spark/blob/c636b87d/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index a7fa32e..ed8093c 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -65,9 +65,9 @@ dataFrame - function(sdf, isCached = FALSE) { #' @examples #'\dontrun{ #' sc - sparkR.init() -#' sqlCtx - sparkRSQL.init(sc) +#' sqlContext - sparkRSQL.init(sc) #' path - path/to/file.json -#' df - jsonFile(sqlCtx, path) +#' df - jsonFile(sqlContext, path) #' printSchema(df) #'} setMethod(printSchema, @@ -88,9 +88,9 @@ setMethod(printSchema, #' @examples #'\dontrun{ #' sc - sparkR.init() -#' sqlCtx - sparkRSQL.init(sc) +#' sqlContext - sparkRSQL.init(sc) #' path - path/to/file.json -#' df - jsonFile(sqlCtx, path) +#' df - jsonFile(sqlContext, path) #' dfSchema - schema(df) #'} setMethod(schema, @@ -110,9 +110,9 @@ setMethod(schema, #' @examples #'\dontrun{ #' sc - sparkR.init() -#' sqlCtx - sparkRSQL.init(sc) +#' sqlContext - sparkRSQL.init(sc) #' path - path/to/file.json -#' df - jsonFile(sqlCtx, path) +#' df - jsonFile(sqlContext, path) #' explain(df, TRUE) #'} setMethod(explain
spark git commit: [SPARK-6806] [SPARKR] [DOCS] Fill in SparkR examples in programming guide
Repository: spark Updated Branches: refs/heads/master 4583cf4be - 7af3818c6 [SPARK-6806] [SPARKR] [DOCS] Fill in SparkR examples in programming guide sqlCtx - sqlContext You can check the docs by: ``` $ cd docs $ SKIP_SCALADOC=1 jekyll serve ``` cc shivaram Author: Davies Liu dav...@databricks.com Closes #5442 from davies/r_docs and squashes the following commits: 7a12ec6 [Davies Liu] remove rdd in R docs 8496b26 [Davies Liu] remove the docs related to RDD e23b9d6 [Davies Liu] delete R docs for RDD API 222e4ff [Davies Liu] Merge branch 'master' into r_docs 89684ce [Davies Liu] Merge branch 'r_docs' of github.com:davies/spark into r_docs f0a10e1 [Davies Liu] address comments from @shivaram f61de71 [Davies Liu] Update pairRDD.R 3ef7cf3 [Davies Liu] use + instead of function(a,b) a+b 2f10a77 [Davies Liu] address comments from @cafreeman 9c2a062 [Davies Liu] mention R api together with Python API 23f751a [Davies Liu] Fill in SparkR examples in programming guide Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7af3818c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7af3818c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7af3818c Branch: refs/heads/master Commit: 7af3818c6b2bf35bfa531ab7cc3a4a714385015e Parents: 4583cf4 Author: Davies Liu dav...@databricks.com Authored: Sat May 23 00:00:30 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Sat May 23 00:01:40 2015 -0700 -- R/README.md | 4 +- R/pkg/R/DataFrame.R | 176 R/pkg/R/RDD.R| 2 +- R/pkg/R/SQLContext.R | 165 --- R/pkg/R/pairRDD.R| 4 +- R/pkg/R/sparkR.R | 10 +- R/pkg/inst/profile/shell.R | 6 +- R/pkg/inst/tests/test_sparkSQL.R | 156 +++--- docs/_plugins/copy_api_dirs.rb | 68 --- docs/api.md | 3 +- docs/index.md| 23 ++- docs/programming-guide.md| 21 +- docs/quick-start.md | 18 +- docs/sql-programming-guide.md| 373 +- 14 files changed, 706 insertions(+), 323 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7af3818c/R/README.md -- diff --git a/R/README.md b/R/README.md index a6970e3..d7d65b4 100644 --- a/R/README.md +++ b/R/README.md @@ -52,7 +52,7 @@ The SparkR documentation (Rd files and HTML files) are not a part of the source SparkR comes with several sample programs in the `examples/src/main/r` directory. To run one of them, use `./bin/sparkR filename args`. For example: -./bin/sparkR examples/src/main/r/pi.R local[2] +./bin/sparkR examples/src/main/r/dataframe.R You can also run the unit-tests for SparkR by running (you need to install the [testthat](http://cran.r-project.org/web/packages/testthat/index.html) package first): @@ -63,5 +63,5 @@ You can also run the unit-tests for SparkR by running (you need to install the [ The `./bin/spark-submit` and `./bin/sparkR` can also be used to submit jobs to YARN clusters. You will need to set YARN conf dir before doing so. For example on CDH you can run ``` export YARN_CONF_DIR=/etc/hadoop/conf -./bin/spark-submit --master yarn examples/src/main/r/pi.R 4 +./bin/spark-submit --master yarn examples/src/main/r/dataframe.R ``` http://git-wip-us.apache.org/repos/asf/spark/blob/7af3818c/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index a7fa32e..ed8093c 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -65,9 +65,9 @@ dataFrame - function(sdf, isCached = FALSE) { #' @examples #'\dontrun{ #' sc - sparkR.init() -#' sqlCtx - sparkRSQL.init(sc) +#' sqlContext - sparkRSQL.init(sc) #' path - path/to/file.json -#' df - jsonFile(sqlCtx, path) +#' df - jsonFile(sqlContext, path) #' printSchema(df) #'} setMethod(printSchema, @@ -88,9 +88,9 @@ setMethod(printSchema, #' @examples #'\dontrun{ #' sc - sparkR.init() -#' sqlCtx - sparkRSQL.init(sc) +#' sqlContext - sparkRSQL.init(sc) #' path - path/to/file.json -#' df - jsonFile(sqlCtx, path) +#' df - jsonFile(sqlContext, path) #' dfSchema - schema(df) #'} setMethod(schema, @@ -110,9 +110,9 @@ setMethod(schema, #' @examples #'\dontrun{ #' sc - sparkR.init() -#' sqlCtx - sparkRSQL.init(sc) +#' sqlContext - sparkRSQL.init(sc) #' path - path/to/file.json -#' df - jsonFile(sqlCtx, path) +#' df - jsonFile(sqlContext, path) #' explain(df, TRUE) #'} setMethod(explain, @@ -139,9 +139,9 @@ setMethod(explain, #' @examples #'\dontrun{ #' sc - sparkR.init() -#' sqlCtx - sparkRSQL.init(sc) +#' sqlContext
spark git commit: [SPARK-8821] [EC2] Switched to binary mode for file reading
Repository: spark Updated Branches: refs/heads/master 738c10748 - 70beb808e [SPARK-8821] [EC2] Switched to binary mode for file reading Otherwise the script will crash with - Downloading boto... Traceback (most recent call last): File ec2/spark_ec2.py, line 148, in module setup_external_libs(external_libs) File ec2/spark_ec2.py, line 128, in setup_external_libs if hashlib.md5(tar.read()).hexdigest() != lib[md5]: File /usr/lib/python3.4/codecs.py, line 319, in decode (result, consumed) = self._buffer_decode(data, self.errors, final) UnicodeDecodeError: 'utf-8' codec can't decode byte 0x8b in position 1: invalid start byte In case of an utf8 env setting. Author: Simon Hafner hafnersi...@gmail.com Closes #7215 from reactormonk/branch-1.4 and squashes the following commits: e86957a [Simon Hafner] [SPARK-8821] [EC2] Switched to binary mode (cherry picked from commit 83a621a5a8f8a2991c4cfa687279589e5c623d46) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/70beb808 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/70beb808 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/70beb808 Branch: refs/heads/master Commit: 70beb808e13f6371968ac87f7cf625ed110375e6 Parents: 738c107 Author: Simon Hafner hafnersi...@gmail.com Authored: Tue Jul 7 09:42:59 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Tue Jul 7 09:43:16 2015 -0700 -- ec2/spark_ec2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/70beb808/ec2/spark_ec2.py -- diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 18ccbc0..8582d43 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -125,7 +125,7 @@ def setup_external_libs(libs): ) with open(tgz_file_path, wb) as tgz_file: tgz_file.write(download_stream.read()) -with open(tgz_file_path) as tar: +with open(tgz_file_path, rb) as tar: if hashlib.md5(tar.read()).hexdigest() != lib[md5]: print(ERROR: Got wrong md5sum for {lib}..format(lib=lib[name]), file=stderr) sys.exit(1) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8821] [EC2] Switched to binary mode for file reading
Repository: spark Updated Branches: refs/heads/branch-1.4 bf8b47d17 - 83a621a5a [SPARK-8821] [EC2] Switched to binary mode for file reading Otherwise the script will crash with - Downloading boto... Traceback (most recent call last): File ec2/spark_ec2.py, line 148, in module setup_external_libs(external_libs) File ec2/spark_ec2.py, line 128, in setup_external_libs if hashlib.md5(tar.read()).hexdigest() != lib[md5]: File /usr/lib/python3.4/codecs.py, line 319, in decode (result, consumed) = self._buffer_decode(data, self.errors, final) UnicodeDecodeError: 'utf-8' codec can't decode byte 0x8b in position 1: invalid start byte In case of an utf8 env setting. Author: Simon Hafner hafnersi...@gmail.com Closes #7215 from reactormonk/branch-1.4 and squashes the following commits: e86957a [Simon Hafner] [SPARK-8821] [EC2] Switched to binary mode Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/83a621a5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/83a621a5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/83a621a5 Branch: refs/heads/branch-1.4 Commit: 83a621a5a8f8a2991c4cfa687279589e5c623d46 Parents: bf8b47d Author: Simon Hafner hafnersi...@gmail.com Authored: Tue Jul 7 09:42:59 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Tue Jul 7 09:42:59 2015 -0700 -- ec2/spark_ec2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/83a621a5/ec2/spark_ec2.py -- diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 05fa47f..91f0a24 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -127,7 +127,7 @@ def setup_external_libs(libs): ) with open(tgz_file_path, wb) as tgz_file: tgz_file.write(download_stream.read()) -with open(tgz_file_path) as tar: +with open(tgz_file_path, rb) as tar: if hashlib.md5(tar.read()).hexdigest() != lib[md5]: print(ERROR: Got wrong md5sum for {lib}..format(lib=lib[name]), file=stderr) sys.exit(1) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-9318] [SPARK-9320] [SPARKR] Aliases for merge and summary functions on DataFrames
Repository: spark Updated Branches: refs/heads/master 8cb415a4b - 712f5b7a9 [SPARK-9318] [SPARK-9320] [SPARKR] Aliases for merge and summary functions on DataFrames This PR adds synonyms for ```merge``` and ```summary``` in SparkR DataFrame API. cc shivaram Author: Hossein hoss...@databricks.com Closes #7806 from falaki/SPARK-9320 and squashes the following commits: 72600f7 [Hossein] Updated docs 92a6e75 [Hossein] Fixed merge generic signature issue 4c2b051 [Hossein] Fixing naming with mllib summary 0f3a64c [Hossein] Added ... to generic for merge 30fbaf8 [Hossein] Merged master ae1a4cf [Hossein] Merge branch 'master' into SPARK-9320 e8eb86f [Hossein] Add a generic for merge fc01f2d [Hossein] Added unit test 8d92012 [Hossein] Added merge as an alias for join 5b8bedc [Hossein] Added unit test 632693d [Hossein] Added summary as an alias for describe for DataFrame Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/712f5b7a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/712f5b7a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/712f5b7a Branch: refs/heads/master Commit: 712f5b7a9ab52c26e3d086629633950ec2fb7afc Parents: 8cb415a Author: Hossein hoss...@databricks.com Authored: Fri Jul 31 19:24:00 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Fri Jul 31 19:24:44 2015 -0700 -- R/pkg/NAMESPACE | 2 ++ R/pkg/R/DataFrame.R | 22 ++ R/pkg/R/generics.R | 8 R/pkg/R/mllib.R | 8 R/pkg/inst/tests/test_sparkSQL.R | 14 -- 5 files changed, 48 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/712f5b7a/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index ff116cb..b2d92bd 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -46,6 +46,7 @@ exportMethods(arrange, isLocal, join, limit, + merge, names, ncol, nrow, @@ -69,6 +70,7 @@ exportMethods(arrange, show, showDF, summarize, + summary, take, unionAll, unique, http://git-wip-us.apache.org/repos/asf/spark/blob/712f5b7a/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index b4065d2..8956032 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1279,6 +1279,15 @@ setMethod(join, dataFrame(sdf) }) +#' rdname merge +#' aliases join +setMethod(merge, + signature(x = DataFrame, y = DataFrame), + function(x, y, joinExpr = NULL, joinType = NULL, ...) { +join(x, y, joinExpr, joinType) + }) + + #' UnionAll #' #' Return a new DataFrame containing the union of rows in this DataFrame @@ -1524,6 +1533,19 @@ setMethod(describe, dataFrame(sdf) }) +#' @title Summary +#' +#' @description Computes statistics for numeric columns of the DataFrame +#' +#' @rdname summary +#' @aliases describe +setMethod(summary, + signature(x = DataFrame), + function(x) { +describe(x) + }) + + #' dropna #' #' Returns a new DataFrame omitting rows with null values. http://git-wip-us.apache.org/repos/asf/spark/blob/712f5b7a/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 71d1e34..c43b947 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -461,6 +461,10 @@ setGeneric(isLocal, function(x) { standardGeneric(isLocal) }) #' @export setGeneric(limit, function(x, num) {standardGeneric(limit) }) +#' rdname merge +#' @export +setGeneric(merge) + #' @rdname withColumn #' @export setGeneric(mutate, function(x, ...) {standardGeneric(mutate) }) @@ -531,6 +535,10 @@ setGeneric(showDF, function(x,...) { standardGeneric(showDF) }) #' @export setGeneric(summarize, function(x,...) { standardGeneric(summarize) }) +##' rdname summary +##' @export +setGeneric(summary, function(x, ...) { standardGeneric(summary) }) + # @rdname tojson # @export setGeneric(toJSON, function(x) { standardGeneric(toJSON) }) http://git-wip-us.apache.org/repos/asf/spark/blob/712f5b7a/R/pkg/R/mllib.R -- diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R index efddcc1..b524d1f 100644 --- a/R/pkg/R/mllib.R +++ b/R/pkg/R/mllib.R @@ -86,12 +86,12 @@ setMethod(predict, signature(object = PipelineModel), #' model - glm(y ~ x, trainingData
spark git commit: [SPARK-9855] [SPARKR] Add expression functions into SparkR whose params are simple
Repository: spark Updated Branches: refs/heads/master 0d1d146c2 - f4bc01f1f [SPARK-9855] [SPARKR] Add expression functions into SparkR whose params are simple I added lots of expression functions for SparkR. This PR includes only functions whose params are only `(Column)` or `(Column, Column)`. And I think we need to improve how to test those functions. However, it would be better to work on another issue. ## Diff Summary - Add lots of functions in `functions.R` and their generic in `generic.R` - Add aliases for `ceiling` and `sign` - Move expression functions from `column.R` to `functions.R` - Modify `rdname` from `column` to `functions` I haven't supported `not` function, because the name has a collesion with `testthat` package. I didn't think of the way to define it. ## New Supported Functions ``` approxCountDistinct ascii base64 bin bitwiseNOT ceil (alias: ceiling) crc32 dayofmonth dayofyear explode factorial hex hour initcap isNaN last_day length log2 ltrim md5 minute month negate quarter reverse round rtrim second sha1 signum (alias: sign) size soundex to_date trim unbase64 unhex weekofyear year datediff levenshtein months_between nanvl pmod ``` ## JIRA [[SPARK-9855] Add expression functions into SparkR whose params are simple - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9855) Author: Yu ISHIKAWA yuu.ishik...@gmail.com Closes #8123 from yu-iskw/SPARK-9855. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f4bc01f1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f4bc01f1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f4bc01f1 Branch: refs/heads/master Commit: f4bc01f1f33a93e6affe5c8a3e33ffbd92d03f38 Parents: 0d1d146 Author: Yu ISHIKAWA yuu.ishik...@gmail.com Authored: Wed Aug 12 18:33:27 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Wed Aug 12 18:33:27 2015 -0700 -- R/pkg/DESCRIPTION| 1 + R/pkg/R/column.R | 81 --- R/pkg/R/functions.R | 123 ++ R/pkg/R/generics.R | 185 +++--- R/pkg/inst/tests/test_sparkSQL.R | 21 ++-- 5 files changed, 309 insertions(+), 102 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f4bc01f1/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 4949d86..83e6489 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -29,6 +29,7 @@ Collate: 'client.R' 'context.R' 'deserialize.R' +'functions.R' 'mllib.R' 'serialize.R' 'sparkR.R' http://git-wip-us.apache.org/repos/asf/spark/blob/f4bc01f1/R/pkg/R/column.R -- diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index eeaf9f1..328f595 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -60,12 +60,6 @@ operators - list( ) column_functions1 - c(asc, desc, isNull, isNotNull) column_functions2 - c(like, rlike, startsWith, endsWith, getField, getItem, contains) -functions - c(min, max, sum, avg, mean, count, abs, sqrt, - first, last, lower, upper, sumDistinct, - acos, asin, atan, cbrt, ceiling, cos, cosh, exp, - expm1, floor, log, log10, log1p, rint, sign, - sin, sinh, tan, tanh, toDegrees, toRadians) -binary_mathfunctions - c(atan2, hypot) createOperator - function(op) { setMethod(op, @@ -111,33 +105,6 @@ createColumnFunction2 - function(name) { }) } -createStaticFunction - function(name) { - setMethod(name, -signature(x = Column), -function(x) { - if (name == ceiling) { - name - ceil - } - if (name == sign) { - name - signum - } - jc - callJStatic(org.apache.spark.sql.functions, name, x@jc) - column(jc) -}) -} - -createBinaryMathfunctions - function(name) { - setMethod(name, -signature(y = Column), -function(y, x) { - if (class(x) == Column) { -x - x@jc - } - jc - callJStatic(org.apache.spark.sql.functions, name, y@jc, x) - column(jc) -}) -} - createMethods - function() { for (op in names(operators)) { createOperator(op) @@ -148,12 +115,6 @@ createMethods - function() { for (name in column_functions2) { createColumnFunction2(name) } - for (x in functions) { -createStaticFunction(x) - } - for (name in binary_mathfunctions) { -createBinaryMathfunctions(name) - } } createMethods() @@ -242,45 +203,3 @@ setMethod(%in%, jc - callJMethod(x
spark git commit: [SPARK-9855] [SPARKR] Add expression functions into SparkR whose params are simple
Repository: spark Updated Branches: refs/heads/branch-1.5 62ab2a4c6 - ca39c9e91 [SPARK-9855] [SPARKR] Add expression functions into SparkR whose params are simple I added lots of expression functions for SparkR. This PR includes only functions whose params are only `(Column)` or `(Column, Column)`. And I think we need to improve how to test those functions. However, it would be better to work on another issue. ## Diff Summary - Add lots of functions in `functions.R` and their generic in `generic.R` - Add aliases for `ceiling` and `sign` - Move expression functions from `column.R` to `functions.R` - Modify `rdname` from `column` to `functions` I haven't supported `not` function, because the name has a collesion with `testthat` package. I didn't think of the way to define it. ## New Supported Functions ``` approxCountDistinct ascii base64 bin bitwiseNOT ceil (alias: ceiling) crc32 dayofmonth dayofyear explode factorial hex hour initcap isNaN last_day length log2 ltrim md5 minute month negate quarter reverse round rtrim second sha1 signum (alias: sign) size soundex to_date trim unbase64 unhex weekofyear year datediff levenshtein months_between nanvl pmod ``` ## JIRA [[SPARK-9855] Add expression functions into SparkR whose params are simple - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9855) Author: Yu ISHIKAWA yuu.ishik...@gmail.com Closes #8123 from yu-iskw/SPARK-9855. (cherry picked from commit f4bc01f1f33a93e6affe5c8a3e33ffbd92d03f38) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ca39c9e9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ca39c9e9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ca39c9e9 Branch: refs/heads/branch-1.5 Commit: ca39c9e91602223f5665ab6942b917c4900bd996 Parents: 62ab2a4 Author: Yu ISHIKAWA yuu.ishik...@gmail.com Authored: Wed Aug 12 18:33:27 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Wed Aug 12 18:33:35 2015 -0700 -- R/pkg/DESCRIPTION| 1 + R/pkg/R/column.R | 81 --- R/pkg/R/functions.R | 123 ++ R/pkg/R/generics.R | 185 +++--- R/pkg/inst/tests/test_sparkSQL.R | 21 ++-- 5 files changed, 309 insertions(+), 102 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ca39c9e9/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 4949d86..83e6489 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -29,6 +29,7 @@ Collate: 'client.R' 'context.R' 'deserialize.R' +'functions.R' 'mllib.R' 'serialize.R' 'sparkR.R' http://git-wip-us.apache.org/repos/asf/spark/blob/ca39c9e9/R/pkg/R/column.R -- diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index eeaf9f1..328f595 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -60,12 +60,6 @@ operators - list( ) column_functions1 - c(asc, desc, isNull, isNotNull) column_functions2 - c(like, rlike, startsWith, endsWith, getField, getItem, contains) -functions - c(min, max, sum, avg, mean, count, abs, sqrt, - first, last, lower, upper, sumDistinct, - acos, asin, atan, cbrt, ceiling, cos, cosh, exp, - expm1, floor, log, log10, log1p, rint, sign, - sin, sinh, tan, tanh, toDegrees, toRadians) -binary_mathfunctions - c(atan2, hypot) createOperator - function(op) { setMethod(op, @@ -111,33 +105,6 @@ createColumnFunction2 - function(name) { }) } -createStaticFunction - function(name) { - setMethod(name, -signature(x = Column), -function(x) { - if (name == ceiling) { - name - ceil - } - if (name == sign) { - name - signum - } - jc - callJStatic(org.apache.spark.sql.functions, name, x@jc) - column(jc) -}) -} - -createBinaryMathfunctions - function(name) { - setMethod(name, -signature(y = Column), -function(y, x) { - if (class(x) == Column) { -x - x@jc - } - jc - callJStatic(org.apache.spark.sql.functions, name, y@jc, x) - column(jc) -}) -} - createMethods - function() { for (op in names(operators)) { createOperator(op) @@ -148,12 +115,6 @@ createMethods - function() { for (name in column_functions2) { createColumnFunction2(name) } - for (x in functions) { -createStaticFunction(x) - } - for (name
spark git commit: [SPARK-9916] [BUILD] [SPARKR] removed left-over sparkr.zip copy/create commands from codebase
Repository: spark Updated Branches: refs/heads/master d7053bea9 - 2fb4901b7 [SPARK-9916] [BUILD] [SPARKR] removed left-over sparkr.zip copy/create commands from codebase sparkr.zip is now built by SparkSubmit on a need-to-build basis. cc shivaram Author: Burak Yavuz brk...@gmail.com Closes #8147 from brkyvz/make-dist-fix. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2fb4901b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2fb4901b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2fb4901b Branch: refs/heads/master Commit: 2fb4901b71cee65d40a43e61e3f4411c30cdefc3 Parents: d7053be Author: Burak Yavuz brk...@gmail.com Authored: Wed Aug 12 20:59:38 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Wed Aug 12 20:59:38 2015 -0700 -- R/install-dev.bat| 5 - make-distribution.sh | 1 - 2 files changed, 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2fb4901b/R/install-dev.bat -- diff --git a/R/install-dev.bat b/R/install-dev.bat index f32670b..008a5c6 100644 --- a/R/install-dev.bat +++ b/R/install-dev.bat @@ -25,8 +25,3 @@ set SPARK_HOME=%~dp0.. MKDIR %SPARK_HOME%\R\lib R.exe CMD INSTALL --library=%SPARK_HOME%\R\lib %SPARK_HOME%\R\pkg\ - -rem Zip the SparkR package so that it can be distributed to worker nodes on YARN -pushd %SPARK_HOME%\R\lib -%JAVA_HOME%\bin\jar.exe cfM %SPARK_HOME%\R\lib\sparkr.zip SparkR -popd http://git-wip-us.apache.org/repos/asf/spark/blob/2fb4901b/make-distribution.sh -- diff --git a/make-distribution.sh b/make-distribution.sh index 4789b0e..247a813 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -219,7 +219,6 @@ cp -r $SPARK_HOME/ec2 $DISTDIR if [ -d $SPARK_HOME/R/lib/SparkR ]; then mkdir -p $DISTDIR/R/lib cp -r $SPARK_HOME/R/lib/SparkR $DISTDIR/R/lib - cp $SPARK_HOME/R/lib/sparkr.zip $DISTDIR/R/lib fi # Download and copy in tachyon, if requested - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-9916] [BUILD] [SPARKR] removed left-over sparkr.zip copy/create commands from codebase
Repository: spark Updated Branches: refs/heads/branch-1.5 af470a757 - 3d1b9f007 [SPARK-9916] [BUILD] [SPARKR] removed left-over sparkr.zip copy/create commands from codebase sparkr.zip is now built by SparkSubmit on a need-to-build basis. cc shivaram Author: Burak Yavuz brk...@gmail.com Closes #8147 from brkyvz/make-dist-fix. (cherry picked from commit 2fb4901b71cee65d40a43e61e3f4411c30cdefc3) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3d1b9f00 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3d1b9f00 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3d1b9f00 Branch: refs/heads/branch-1.5 Commit: 3d1b9f007b9b6a9bb4e146de32bd34affa723e12 Parents: af470a7 Author: Burak Yavuz brk...@gmail.com Authored: Wed Aug 12 20:59:38 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Wed Aug 12 20:59:47 2015 -0700 -- R/install-dev.bat| 5 - make-distribution.sh | 1 - 2 files changed, 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3d1b9f00/R/install-dev.bat -- diff --git a/R/install-dev.bat b/R/install-dev.bat index f32670b..008a5c6 100644 --- a/R/install-dev.bat +++ b/R/install-dev.bat @@ -25,8 +25,3 @@ set SPARK_HOME=%~dp0.. MKDIR %SPARK_HOME%\R\lib R.exe CMD INSTALL --library=%SPARK_HOME%\R\lib %SPARK_HOME%\R\pkg\ - -rem Zip the SparkR package so that it can be distributed to worker nodes on YARN -pushd %SPARK_HOME%\R\lib -%JAVA_HOME%\bin\jar.exe cfM %SPARK_HOME%\R\lib\sparkr.zip SparkR -popd http://git-wip-us.apache.org/repos/asf/spark/blob/3d1b9f00/make-distribution.sh -- diff --git a/make-distribution.sh b/make-distribution.sh index 8589255..04ad005 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -219,7 +219,6 @@ cp -r $SPARK_HOME/ec2 $DISTDIR if [ -d $SPARK_HOME/R/lib/SparkR ]; then mkdir -p $DISTDIR/R/lib cp -r $SPARK_HOME/R/lib/SparkR $DISTDIR/R/lib - cp $SPARK_HOME/R/lib/sparkr.zip $DISTDIR/R/lib fi # Download and copy in tachyon, if requested - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8844] [SPARKR] head/collect is broken in SparkR.
Repository: spark Updated Branches: refs/heads/master 182f9b7a6 - 5f9ce738f [SPARK-8844] [SPARKR] head/collect is broken in SparkR. This is a WIP patch for SPARK-8844 for collecting reviews. This bug is about reading an empty DataFrame. in readCol(), lapply(1:numRows, function(x) { does not take into consideration the case where numRows = 0. Will add unit test case. Author: Sun Rui rui@intel.com Closes #7419 from sun-rui/SPARK-8844. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5f9ce738 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5f9ce738 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5f9ce738 Branch: refs/heads/master Commit: 5f9ce738fe6bab3f0caffad0df1d3876178cf469 Parents: 182f9b7 Author: Sun Rui rui@intel.com Authored: Sun Aug 16 00:30:02 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Sun Aug 16 00:30:02 2015 -0700 -- R/pkg/R/deserialize.R| 16 ++-- R/pkg/inst/tests/test_sparkSQL.R | 20 2 files changed, 30 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5f9ce738/R/pkg/R/deserialize.R -- diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R index 6d364f7..33bf13e 100644 --- a/R/pkg/R/deserialize.R +++ b/R/pkg/R/deserialize.R @@ -176,10 +176,14 @@ readRow - function(inputCon) { # Take a single column as Array[Byte] and deserialize it into an atomic vector readCol - function(inputCon, numRows) { - # sapply can not work with POSIXlt - do.call(c, lapply(1:numRows, function(x) { -value - readObject(inputCon) -# Replace NULL with NA so we can coerce to vectors -if (is.null(value)) NA else value - })) + if (numRows 0) { +# sapply can not work with POSIXlt +do.call(c, lapply(1:numRows, function(x) { + value - readObject(inputCon) + # Replace NULL with NA so we can coerce to vectors + if (is.null(value)) NA else value +})) + } else { +vector() + } } http://git-wip-us.apache.org/repos/asf/spark/blob/5f9ce738/R/pkg/inst/tests/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index e6d3b21..c77f633 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -408,6 +408,14 @@ test_that(collect() returns a data.frame, { expect_equal(names(rdf)[1], age) expect_equal(nrow(rdf), 3) expect_equal(ncol(rdf), 2) + + # collect() returns data correctly from a DataFrame with 0 row + df0 - limit(df, 0) + rdf - collect(df0) + expect_true(is.data.frame(rdf)) + expect_equal(names(rdf)[1], age) + expect_equal(nrow(rdf), 0) + expect_equal(ncol(rdf), 2) }) test_that(limit() returns DataFrame with the correct number of rows, { @@ -492,6 +500,18 @@ test_that(head() and first() return the correct data, { testFirst - first(df) expect_equal(nrow(testFirst), 1) + + # head() and first() return the correct data on + # a DataFrame with 0 row + df0 - limit(df, 0) + + testHead - head(df0) + expect_equal(nrow(testHead), 0) + expect_equal(ncol(testHead), 2) + + testFirst - first(df0) + expect_equal(nrow(testFirst), 0) + expect_equal(ncol(testFirst), 2) }) test_that(distinct() and unique on DataFrames, { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8844] [SPARKR] head/collect is broken in SparkR.
Repository: spark Updated Branches: refs/heads/branch-1.5 881baf100 - 4f75ce2e1 [SPARK-8844] [SPARKR] head/collect is broken in SparkR. This is a WIP patch for SPARK-8844 for collecting reviews. This bug is about reading an empty DataFrame. in readCol(), lapply(1:numRows, function(x) { does not take into consideration the case where numRows = 0. Will add unit test case. Author: Sun Rui rui@intel.com Closes #7419 from sun-rui/SPARK-8844. (cherry picked from commit 5f9ce738fe6bab3f0caffad0df1d3876178cf469) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4f75ce2e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4f75ce2e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4f75ce2e Branch: refs/heads/branch-1.5 Commit: 4f75ce2e193c813f4e3ad067749b6e7b4f0ee135 Parents: 881baf1 Author: Sun Rui rui@intel.com Authored: Sun Aug 16 00:30:02 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Sun Aug 16 00:30:10 2015 -0700 -- R/pkg/R/deserialize.R| 16 ++-- R/pkg/inst/tests/test_sparkSQL.R | 20 2 files changed, 30 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4f75ce2e/R/pkg/R/deserialize.R -- diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R index 6d364f7..33bf13e 100644 --- a/R/pkg/R/deserialize.R +++ b/R/pkg/R/deserialize.R @@ -176,10 +176,14 @@ readRow - function(inputCon) { # Take a single column as Array[Byte] and deserialize it into an atomic vector readCol - function(inputCon, numRows) { - # sapply can not work with POSIXlt - do.call(c, lapply(1:numRows, function(x) { -value - readObject(inputCon) -# Replace NULL with NA so we can coerce to vectors -if (is.null(value)) NA else value - })) + if (numRows 0) { +# sapply can not work with POSIXlt +do.call(c, lapply(1:numRows, function(x) { + value - readObject(inputCon) + # Replace NULL with NA so we can coerce to vectors + if (is.null(value)) NA else value +})) + } else { +vector() + } } http://git-wip-us.apache.org/repos/asf/spark/blob/4f75ce2e/R/pkg/inst/tests/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index e6d3b21..c77f633 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -408,6 +408,14 @@ test_that(collect() returns a data.frame, { expect_equal(names(rdf)[1], age) expect_equal(nrow(rdf), 3) expect_equal(ncol(rdf), 2) + + # collect() returns data correctly from a DataFrame with 0 row + df0 - limit(df, 0) + rdf - collect(df0) + expect_true(is.data.frame(rdf)) + expect_equal(names(rdf)[1], age) + expect_equal(nrow(rdf), 0) + expect_equal(ncol(rdf), 2) }) test_that(limit() returns DataFrame with the correct number of rows, { @@ -492,6 +500,18 @@ test_that(head() and first() return the correct data, { testFirst - first(df) expect_equal(nrow(testFirst), 1) + + # head() and first() return the correct data on + # a DataFrame with 0 row + df0 - limit(df, 0) + + testHead - head(df0) + expect_equal(nrow(testHead), 0) + expect_equal(ncol(testHead), 2) + + testFirst - first(df0) + expect_equal(nrow(testFirst), 0) + expect_equal(ncol(testFirst), 2) }) test_that(distinct() and unique on DataFrames, { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-9856] [SPARKR] Add expression functions into SparkR whose params are complicated
Repository: spark Updated Branches: refs/heads/master f3e177917 - 2fcb9cb95 [SPARK-9856] [SPARKR] Add expression functions into SparkR whose params are complicated I added lots of Column functinos into SparkR. And I also added `rand(seed: Int)` and `randn(seed: Int)` in Scala. Since we need such APIs for R integer type. ### JIRA [[SPARK-9856] Add expression functions into SparkR whose params are complicated - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9856) Author: Yu ISHIKAWA yuu.ishik...@gmail.com Closes #8264 from yu-iskw/SPARK-9856-3. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2fcb9cb9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2fcb9cb9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2fcb9cb9 Branch: refs/heads/master Commit: 2fcb9cb9552dac1d78dcca5d4d5032b4fa6c985c Parents: f3e1779 Author: Yu ISHIKAWA yuu.ishik...@gmail.com Authored: Wed Aug 19 10:41:14 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Wed Aug 19 10:41:14 2015 -0700 -- R/pkg/NAMESPACE | 28 ++ R/pkg/R/functions.R | 415 +++ R/pkg/R/generics.R | 113 + R/pkg/inst/tests/test_sparkSQL.R| 98 - .../apache/spark/api/r/RBackendHandler.scala| 1 + 5 files changed, 649 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2fcb9cb9/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 8fa12d5..111a2dc 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -84,6 +84,7 @@ exportClasses(Column) exportMethods(abs, acos, + add_months, alias, approxCountDistinct, asc, @@ -101,12 +102,17 @@ exportMethods(abs, ceil, ceiling, concat, + concat_ws, contains, + conv, cos, cosh, count, countDistinct, crc32, + date_add, + date_format, + date_sub, datediff, dayofmonth, dayofyear, @@ -115,9 +121,14 @@ exportMethods(abs, exp, explode, expm1, + expr, factorial, first, floor, + format_number, + format_string, + from_unixtime, + from_utc_timestamp, getField, getItem, greatest, @@ -125,6 +136,7 @@ exportMethods(abs, hour, hypot, initcap, + instr, isNaN, isNotNull, isNull, @@ -135,11 +147,13 @@ exportMethods(abs, levenshtein, like, lit, + locate, log, log10, log1p, log2, lower, + lpad, ltrim, max, md5, @@ -152,16 +166,26 @@ exportMethods(abs, n_distinct, nanvl, negate, + next_day, otherwise, pmod, quarter, + rand, + randn, + regexp_extract, + regexp_replace, reverse, rint, rlike, round, + rpad, rtrim, second, sha1, + sha2, + shiftLeft, + shiftRight, + shiftRightUnsigned, sign, signum, sin, @@ -171,6 +195,7 @@ exportMethods(abs, sqrt, startsWith, substr, + substring_index, sum, sumDistinct, tan, @@ -178,9 +203,12 @@ exportMethods(abs, toDegrees, toRadians, to_date, + to_utc_timestamp, + translate, trim, unbase64, unhex, + unix_timestamp, upper, weekofyear, when, http://git-wip-us.apache.org/repos/asf/spark/blob/2fcb9cb9/R/pkg/R/functions.R -- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 366c230..5dba088 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -166,6 +166,421 @@ setMethod(n, signature(x
spark git commit: [SPARK-9856] [SPARKR] Add expression functions into SparkR whose params are complicated
Repository: spark Updated Branches: refs/heads/branch-1.5 bebe63dfe - a8e880818 [SPARK-9856] [SPARKR] Add expression functions into SparkR whose params are complicated I added lots of Column functinos into SparkR. And I also added `rand(seed: Int)` and `randn(seed: Int)` in Scala. Since we need such APIs for R integer type. ### JIRA [[SPARK-9856] Add expression functions into SparkR whose params are complicated - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9856) Author: Yu ISHIKAWA yuu.ishik...@gmail.com Closes #8264 from yu-iskw/SPARK-9856-3. (cherry picked from commit 2fcb9cb9552dac1d78dcca5d4d5032b4fa6c985c) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a8e88081 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a8e88081 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a8e88081 Branch: refs/heads/branch-1.5 Commit: a8e8808181eec19f34783943ebb42cb8feb0e639 Parents: bebe63d Author: Yu ISHIKAWA yuu.ishik...@gmail.com Authored: Wed Aug 19 10:41:14 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Wed Aug 19 10:41:22 2015 -0700 -- R/pkg/NAMESPACE | 28 ++ R/pkg/R/functions.R | 415 +++ R/pkg/R/generics.R | 113 + R/pkg/inst/tests/test_sparkSQL.R| 98 - .../apache/spark/api/r/RBackendHandler.scala| 1 + 5 files changed, 649 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a8e88081/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 8fa12d5..111a2dc 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -84,6 +84,7 @@ exportClasses(Column) exportMethods(abs, acos, + add_months, alias, approxCountDistinct, asc, @@ -101,12 +102,17 @@ exportMethods(abs, ceil, ceiling, concat, + concat_ws, contains, + conv, cos, cosh, count, countDistinct, crc32, + date_add, + date_format, + date_sub, datediff, dayofmonth, dayofyear, @@ -115,9 +121,14 @@ exportMethods(abs, exp, explode, expm1, + expr, factorial, first, floor, + format_number, + format_string, + from_unixtime, + from_utc_timestamp, getField, getItem, greatest, @@ -125,6 +136,7 @@ exportMethods(abs, hour, hypot, initcap, + instr, isNaN, isNotNull, isNull, @@ -135,11 +147,13 @@ exportMethods(abs, levenshtein, like, lit, + locate, log, log10, log1p, log2, lower, + lpad, ltrim, max, md5, @@ -152,16 +166,26 @@ exportMethods(abs, n_distinct, nanvl, negate, + next_day, otherwise, pmod, quarter, + rand, + randn, + regexp_extract, + regexp_replace, reverse, rint, rlike, round, + rpad, rtrim, second, sha1, + sha2, + shiftLeft, + shiftRight, + shiftRightUnsigned, sign, signum, sin, @@ -171,6 +195,7 @@ exportMethods(abs, sqrt, startsWith, substr, + substring_index, sum, sumDistinct, tan, @@ -178,9 +203,12 @@ exportMethods(abs, toDegrees, toRadians, to_date, + to_utc_timestamp, + translate, trim, unbase64, unhex, + unix_timestamp, upper, weekofyear, when, http://git-wip-us.apache.org/repos/asf/spark/blob/a8e88081/R/pkg/R/functions.R -- diff --git a/R/pkg/R/functions.R b/R/pkg/R
spark git commit: [SPARK-10106] [SPARKR] Add `ifelse` Column function to SparkR
Repository: spark Updated Branches: refs/heads/master 28a98464e - d898c33f7 [SPARK-10106] [SPARKR] Add `ifelse` Column function to SparkR ### JIRA [[SPARK-10106] Add `ifelse` Column function to SparkR - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-10106) Author: Yu ISHIKAWA yuu.ishik...@gmail.com Closes #8303 from yu-iskw/SPARK-10106. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d898c33f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d898c33f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d898c33f Branch: refs/heads/master Commit: d898c33f774b9a3db2fb6aa8f0cb2c2ac6004b58 Parents: 28a9846 Author: Yu ISHIKAWA yuu.ishik...@gmail.com Authored: Wed Aug 19 12:39:37 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Wed Aug 19 12:39:37 2015 -0700 -- R/pkg/NAMESPACE | 1 + R/pkg/R/functions.R | 19 +++ R/pkg/inst/tests/test_sparkSQL.R | 3 ++- 3 files changed, 22 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d898c33f/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 111a2dc..3e5c89d 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -135,6 +135,7 @@ exportMethods(abs, hex, hour, hypot, + ifelse, initcap, instr, isNaN, http://git-wip-us.apache.org/repos/asf/spark/blob/d898c33f/R/pkg/R/functions.R -- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 5dba088..b5879bd 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -594,3 +594,22 @@ setMethod(when, signature(condition = Column, value = ANY), jc - callJStatic(org.apache.spark.sql.functions, when, condition, value) column(jc) }) + +#' ifelse +#' +#' Evaluates a list of conditions and returns `yes` if the conditions are satisfied. +#' Otherwise `no` is returned for unmatched conditions. +#' +#' @rdname column +setMethod(ifelse, + signature(test = Column, yes = ANY, no = ANY), + function(test, yes, no) { + test - test@jc + yes - ifelse(class(yes) == Column, yes@jc, yes) + no - ifelse(class(no) == Column, no@jc, no) + jc - callJMethod(callJStatic(org.apache.spark.sql.functions, +when, +test, yes), +otherwise, no) + column(jc) + }) http://git-wip-us.apache.org/repos/asf/spark/blob/d898c33f/R/pkg/inst/tests/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index 670017e..556b8c5 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -813,11 +813,12 @@ test_that(greatest() and least() on a DataFrame, { expect_equal(collect(select(df, least(df$a, df$b)))[, 1], c(1, 3)) }) -test_that(when() and otherwise() on a DataFrame, { +test_that(when(), otherwise() and ifelse() on a DataFrame, { l - list(list(a = 1, b = 2), list(a = 3, b = 4)) df - createDataFrame(sqlContext, l) expect_equal(collect(select(df, when(df$a 1 df$b 2, 1)))[, 1], c(NA, 1)) expect_equal(collect(select(df, otherwise(when(df$a 1, 1), 0)))[, 1], c(0, 1)) + expect_equal(collect(select(df, ifelse(df$a 1 df$b 2, 0, 1)))[, 1], c(1, 0)) }) test_that(group by, { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-10106] [SPARKR] Add `ifelse` Column function to SparkR
Repository: spark Updated Branches: refs/heads/branch-1.5 f25c32475 - ba369258d [SPARK-10106] [SPARKR] Add `ifelse` Column function to SparkR ### JIRA [[SPARK-10106] Add `ifelse` Column function to SparkR - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-10106) Author: Yu ISHIKAWA yuu.ishik...@gmail.com Closes #8303 from yu-iskw/SPARK-10106. (cherry picked from commit d898c33f774b9a3db2fb6aa8f0cb2c2ac6004b58) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ba369258 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ba369258 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ba369258 Branch: refs/heads/branch-1.5 Commit: ba369258d94ba09b0bfc15d17f6851aa72a4d6d7 Parents: f25c324 Author: Yu ISHIKAWA yuu.ishik...@gmail.com Authored: Wed Aug 19 12:39:37 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Wed Aug 19 12:39:44 2015 -0700 -- R/pkg/NAMESPACE | 1 + R/pkg/R/functions.R | 19 +++ R/pkg/inst/tests/test_sparkSQL.R | 3 ++- 3 files changed, 22 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ba369258/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 111a2dc..3e5c89d 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -135,6 +135,7 @@ exportMethods(abs, hex, hour, hypot, + ifelse, initcap, instr, isNaN, http://git-wip-us.apache.org/repos/asf/spark/blob/ba369258/R/pkg/R/functions.R -- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 5dba088..b5879bd 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -594,3 +594,22 @@ setMethod(when, signature(condition = Column, value = ANY), jc - callJStatic(org.apache.spark.sql.functions, when, condition, value) column(jc) }) + +#' ifelse +#' +#' Evaluates a list of conditions and returns `yes` if the conditions are satisfied. +#' Otherwise `no` is returned for unmatched conditions. +#' +#' @rdname column +setMethod(ifelse, + signature(test = Column, yes = ANY, no = ANY), + function(test, yes, no) { + test - test@jc + yes - ifelse(class(yes) == Column, yes@jc, yes) + no - ifelse(class(no) == Column, no@jc, no) + jc - callJMethod(callJStatic(org.apache.spark.sql.functions, +when, +test, yes), +otherwise, no) + column(jc) + }) http://git-wip-us.apache.org/repos/asf/spark/blob/ba369258/R/pkg/inst/tests/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index 670017e..556b8c5 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -813,11 +813,12 @@ test_that(greatest() and least() on a DataFrame, { expect_equal(collect(select(df, least(df$a, df$b)))[, 1], c(1, 3)) }) -test_that(when() and otherwise() on a DataFrame, { +test_that(when(), otherwise() and ifelse() on a DataFrame, { l - list(list(a = 1, b = 2), list(a = 3, b = 4)) df - createDataFrame(sqlContext, l) expect_equal(collect(select(df, when(df$a 1 df$b 2, 1)))[, 1], c(NA, 1)) expect_equal(collect(select(df, otherwise(when(df$a 1, 1), 0)))[, 1], c(0, 1)) + expect_equal(collect(select(df, ifelse(df$a 1 df$b 2, 0, 1)))[, 1], c(1, 0)) }) test_that(group by, { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARKR] [MINOR] Get rid of a long line warning
Repository: spark Updated Branches: refs/heads/branch-1.5 9b42e2404 - 0a1385e31 [SPARKR] [MINOR] Get rid of a long line warning ``` R/functions.R:74:1: style: lines should not be more than 100 characters. jc - callJStatic(org.apache.spark.sql.functions, lit, ifelse(class(x) == Column, xjc, x)) ^ ``` Author: Yu ISHIKAWA yuu.ishik...@gmail.com Closes #8297 from yu-iskw/minor-lint-r. (cherry picked from commit b4b35f133aecaf84f04e8e444b660a33c6b7894a) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0a1385e3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0a1385e3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0a1385e3 Branch: refs/heads/branch-1.5 Commit: 0a1385e319a2bca115b6bfefe7820b78ce5fb753 Parents: 9b42e24 Author: Yu ISHIKAWA yuu.ishik...@gmail.com Authored: Tue Aug 18 19:18:05 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Tue Aug 18 19:18:13 2015 -0700 -- R/pkg/R/functions.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0a1385e3/R/pkg/R/functions.R -- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 6eef4d6..e606b20 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -71,7 +71,9 @@ createFunctions() #' @return Creates a Column class of literal value. setMethod(lit, signature(ANY), function(x) { -jc - callJStatic(org.apache.spark.sql.functions, lit, ifelse(class(x) == Column, x@jc, x)) +jc - callJStatic(org.apache.spark.sql.functions, + lit, + ifelse(class(x) == Column, x@jc, x)) column(jc) }) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARKR] [MINOR] Get rid of a long line warning
Repository: spark Updated Branches: refs/heads/master 1f8902964 - b4b35f133 [SPARKR] [MINOR] Get rid of a long line warning ``` R/functions.R:74:1: style: lines should not be more than 100 characters. jc - callJStatic(org.apache.spark.sql.functions, lit, ifelse(class(x) == Column, xjc, x)) ^ ``` Author: Yu ISHIKAWA yuu.ishik...@gmail.com Closes #8297 from yu-iskw/minor-lint-r. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b4b35f13 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b4b35f13 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b4b35f13 Branch: refs/heads/master Commit: b4b35f133aecaf84f04e8e444b660a33c6b7894a Parents: 1f89029 Author: Yu ISHIKAWA yuu.ishik...@gmail.com Authored: Tue Aug 18 19:18:05 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Tue Aug 18 19:18:05 2015 -0700 -- R/pkg/R/functions.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b4b35f13/R/pkg/R/functions.R -- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 6eef4d6..e606b20 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -71,7 +71,9 @@ createFunctions() #' @return Creates a Column class of literal value. setMethod(lit, signature(ANY), function(x) { -jc - callJStatic(org.apache.spark.sql.functions, lit, ifelse(class(x) == Column, x@jc, x)) +jc - callJStatic(org.apache.spark.sql.functions, + lit, + ifelse(class(x) == Column, x@jc, x)) column(jc) }) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-10075] [SPARKR] Add `when` expressino function in SparkR
Repository: spark Updated Branches: refs/heads/master a5b5b9365 - bf32c1f7f [SPARK-10075] [SPARKR] Add `when` expressino function in SparkR - Add `when` and `otherwise` as `Column` methods - Add `When` as an expression function - Add `%otherwise%` infix as an alias of `otherwise` Since R doesn't support a feature like method chaining, `otherwise(when(condition, value), value)` style is a little annoying for me. If `%otherwise%` looks strange for shivaram, I can remove it. What do you think? ### JIRA [[SPARK-10075] Add `when` expressino function in SparkR - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-10075) Author: Yu ISHIKAWA yuu.ishik...@gmail.com Closes #8266 from yu-iskw/SPARK-10075. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bf32c1f7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bf32c1f7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bf32c1f7 Branch: refs/heads/master Commit: bf32c1f7f47dd907d787469f979c5859e02ce5e6 Parents: a5b5b93 Author: Yu ISHIKAWA yuu.ishik...@gmail.com Authored: Tue Aug 18 20:27:36 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Tue Aug 18 20:27:36 2015 -0700 -- R/pkg/NAMESPACE | 2 ++ R/pkg/R/column.R | 14 ++ R/pkg/R/functions.R | 14 ++ R/pkg/R/generics.R | 8 R/pkg/inst/tests/test_sparkSQL.R | 7 +++ 5 files changed, 45 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/bf32c1f7/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 607aef2..8fa12d5 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -152,6 +152,7 @@ exportMethods(abs, n_distinct, nanvl, negate, + otherwise, pmod, quarter, reverse, @@ -182,6 +183,7 @@ exportMethods(abs, unhex, upper, weekofyear, + when, year) exportClasses(GroupedData) http://git-wip-us.apache.org/repos/asf/spark/blob/bf32c1f7/R/pkg/R/column.R -- diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index 328f595..5a07ebd 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -203,3 +203,17 @@ setMethod(%in%, jc - callJMethod(x@jc, in, table) return(column(jc)) }) + +#' otherwise +#' +#' If values in the specified column are null, returns the value. +#' Can be used in conjunction with `when` to specify a default value for expressions. +#' +#' @rdname column +setMethod(otherwise, + signature(x = Column, value = ANY), + function(x, value) { +value - ifelse(class(value) == Column, value@jc, value) +jc - callJMethod(x@jc, otherwise, value) +column(jc) + }) http://git-wip-us.apache.org/repos/asf/spark/blob/bf32c1f7/R/pkg/R/functions.R -- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index e606b20..366c230 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -165,3 +165,17 @@ setMethod(n, signature(x = Column), function(x) { count(x) }) + +#' when +#' +#' Evaluates a list of conditions and returns one of multiple possible result expressions. +#' For unmatched expressions null is returned. +#' +#' @rdname column +setMethod(when, signature(condition = Column, value = ANY), + function(condition, value) { + condition - condition@jc + value - ifelse(class(value) == Column, value@jc, value) + jc - callJStatic(org.apache.spark.sql.functions, when, condition, value) + column(jc) + }) http://git-wip-us.apache.org/repos/asf/spark/blob/bf32c1f7/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 5c1cc98..338b32e 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -651,6 +651,14 @@ setGeneric(rlike, function(x, ...) { standardGeneric(rlike) }) #' @export setGeneric(startsWith, function(x, ...) { standardGeneric(startsWith) }) +#' @rdname column +#' @export +setGeneric(when, function(condition, value) { standardGeneric(when) }) + +#' @rdname column +#' @export +setGeneric(otherwise, function(x, value) { standardGeneric(otherwise) }) + ## Expression Function Methods ## http://git-wip-us.apache.org/repos/asf/spark/blob/bf32c1f7/R/pkg/inst/tests/test_sparkSQL.R
spark git commit: [SPARK-10075] [SPARKR] Add `when` expressino function in SparkR
Repository: spark Updated Branches: refs/heads/branch-1.5 bb2fb59f9 - ebaeb1892 [SPARK-10075] [SPARKR] Add `when` expressino function in SparkR - Add `when` and `otherwise` as `Column` methods - Add `When` as an expression function - Add `%otherwise%` infix as an alias of `otherwise` Since R doesn't support a feature like method chaining, `otherwise(when(condition, value), value)` style is a little annoying for me. If `%otherwise%` looks strange for shivaram, I can remove it. What do you think? ### JIRA [[SPARK-10075] Add `when` expressino function in SparkR - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-10075) Author: Yu ISHIKAWA yuu.ishik...@gmail.com Closes #8266 from yu-iskw/SPARK-10075. (cherry picked from commit bf32c1f7f47dd907d787469f979c5859e02ce5e6) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ebaeb189 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ebaeb189 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ebaeb189 Branch: refs/heads/branch-1.5 Commit: ebaeb189260dd338fc5a91d8ec3ff6d45989991a Parents: bb2fb59 Author: Yu ISHIKAWA yuu.ishik...@gmail.com Authored: Tue Aug 18 20:27:36 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Tue Aug 18 20:29:34 2015 -0700 -- R/pkg/NAMESPACE | 2 ++ R/pkg/R/column.R | 14 ++ R/pkg/R/functions.R | 14 ++ R/pkg/R/generics.R | 8 R/pkg/inst/tests/test_sparkSQL.R | 7 +++ 5 files changed, 45 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ebaeb189/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 607aef2..8fa12d5 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -152,6 +152,7 @@ exportMethods(abs, n_distinct, nanvl, negate, + otherwise, pmod, quarter, reverse, @@ -182,6 +183,7 @@ exportMethods(abs, unhex, upper, weekofyear, + when, year) exportClasses(GroupedData) http://git-wip-us.apache.org/repos/asf/spark/blob/ebaeb189/R/pkg/R/column.R -- diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index 328f595..5a07ebd 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -203,3 +203,17 @@ setMethod(%in%, jc - callJMethod(x@jc, in, table) return(column(jc)) }) + +#' otherwise +#' +#' If values in the specified column are null, returns the value. +#' Can be used in conjunction with `when` to specify a default value for expressions. +#' +#' @rdname column +setMethod(otherwise, + signature(x = Column, value = ANY), + function(x, value) { +value - ifelse(class(value) == Column, value@jc, value) +jc - callJMethod(x@jc, otherwise, value) +column(jc) + }) http://git-wip-us.apache.org/repos/asf/spark/blob/ebaeb189/R/pkg/R/functions.R -- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index e606b20..366c230 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -165,3 +165,17 @@ setMethod(n, signature(x = Column), function(x) { count(x) }) + +#' when +#' +#' Evaluates a list of conditions and returns one of multiple possible result expressions. +#' For unmatched expressions null is returned. +#' +#' @rdname column +setMethod(when, signature(condition = Column, value = ANY), + function(condition, value) { + condition - condition@jc + value - ifelse(class(value) == Column, value@jc, value) + jc - callJStatic(org.apache.spark.sql.functions, when, condition, value) + column(jc) + }) http://git-wip-us.apache.org/repos/asf/spark/blob/ebaeb189/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 5c1cc98..338b32e 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -651,6 +651,14 @@ setGeneric(rlike, function(x, ...) { standardGeneric(rlike) }) #' @export setGeneric(startsWith, function(x, ...) { standardGeneric(startsWith) }) +#' @rdname column +#' @export +setGeneric(when, function(condition, value) { standardGeneric(when) }) + +#' @rdname column +#' @export +setGeneric(otherwise, function(x, value) { standardGeneric(otherwise) }) + ## Expression Function Methods
spark git commit: [SPARK-9982] [SPARKR] SparkR DataFrame fail to return data of Decimal type
Repository: spark Updated Branches: refs/heads/branch-1.5 257e9d727 - a7027e6d3 [SPARK-9982] [SPARKR] SparkR DataFrame fail to return data of Decimal type Author: Alex Shkurenko ashkure...@enova.com Closes #8239 from ashkurenko/master. (cherry picked from commit 39e91fe2fd43044cc734d55625a3c03284b69f09) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a7027e6d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a7027e6d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a7027e6d Branch: refs/heads/branch-1.5 Commit: a7027e6d3369a1157c53557c8215273606086d84 Parents: 257e9d7 Author: Alex Shkurenko ashkure...@enova.com Authored: Thu Aug 20 10:16:38 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Thu Aug 20 10:16:57 2015 -0700 -- core/src/main/scala/org/apache/spark/api/r/SerDe.scala | 5 + 1 file changed, 5 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a7027e6d/core/src/main/scala/org/apache/spark/api/r/SerDe.scala -- diff --git a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala index d5b4260..3c89f24 100644 --- a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala +++ b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala @@ -181,6 +181,7 @@ private[spark] object SerDe { // Boolean - logical // Float - double // Double - double + // Decimal - double // Long - double // Array[Byte] - raw // Date - Date @@ -219,6 +220,10 @@ private[spark] object SerDe { case float | java.lang.Float = writeType(dos, double) writeDouble(dos, value.asInstanceOf[Float].toDouble) +case decimal | java.math.BigDecimal = + writeType(dos, double) + val javaDecimal = value.asInstanceOf[java.math.BigDecimal] + writeDouble(dos, scala.math.BigDecimal(javaDecimal).toDouble) case double | java.lang.Double = writeType(dos, double) writeDouble(dos, value.asInstanceOf[Double]) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-9982] [SPARKR] SparkR DataFrame fail to return data of Decimal type
Repository: spark Updated Branches: refs/heads/master 52c60537a - 39e91fe2f [SPARK-9982] [SPARKR] SparkR DataFrame fail to return data of Decimal type Author: Alex Shkurenko ashkure...@enova.com Closes #8239 from ashkurenko/master. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/39e91fe2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/39e91fe2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/39e91fe2 Branch: refs/heads/master Commit: 39e91fe2fd43044cc734d55625a3c03284b69f09 Parents: 52c6053 Author: Alex Shkurenko ashkure...@enova.com Authored: Thu Aug 20 10:16:38 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Thu Aug 20 10:16:38 2015 -0700 -- core/src/main/scala/org/apache/spark/api/r/SerDe.scala | 5 + 1 file changed, 5 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/39e91fe2/core/src/main/scala/org/apache/spark/api/r/SerDe.scala -- diff --git a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala index d5b4260..3c89f24 100644 --- a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala +++ b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala @@ -181,6 +181,7 @@ private[spark] object SerDe { // Boolean - logical // Float - double // Double - double + // Decimal - double // Long - double // Array[Byte] - raw // Date - Date @@ -219,6 +220,10 @@ private[spark] object SerDe { case float | java.lang.Float = writeType(dos, double) writeDouble(dos, value.asInstanceOf[Float].toDouble) +case decimal | java.math.BigDecimal = + writeType(dos, double) + val javaDecimal = value.asInstanceOf[java.math.BigDecimal] + writeDouble(dos, scala.math.BigDecimal(javaDecimal).toDouble) case double | java.lang.Double = writeType(dos, double) writeDouble(dos, value.asInstanceOf[Double]) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: Bump SparkR version string to 1.5.0
Repository: spark Updated Branches: refs/heads/master badf7fa65 - 04e0fea79 Bump SparkR version string to 1.5.0 This patch is against master, but we need to apply it to 1.5 branch as well. cc shivaram and rxin Author: Hossein hoss...@databricks.com Closes #8291 from falaki/SparkRVersion1.5. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/04e0fea7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/04e0fea7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/04e0fea7 Branch: refs/heads/master Commit: 04e0fea79b9acfa3a3cb81dbacb08f9d287b42c3 Parents: badf7fa Author: Hossein hoss...@databricks.com Authored: Tue Aug 18 18:02:22 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Tue Aug 18 18:02:22 2015 -0700 -- R/pkg/DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/04e0fea7/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 83e6489..d0d7201 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,7 +1,7 @@ Package: SparkR Type: Package Title: R frontend for Spark -Version: 1.4.0 +Version: 1.5.0 Date: 2013-09-09 Author: The Apache Software Foundation Maintainer: Shivaram Venkataraman shiva...@cs.berkeley.edu - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: Bump SparkR version string to 1.5.0
Repository: spark Updated Branches: refs/heads/branch-1.5 4ee225af8 - 9b42e2404 Bump SparkR version string to 1.5.0 This patch is against master, but we need to apply it to 1.5 branch as well. cc shivaram and rxin Author: Hossein hoss...@databricks.com Closes #8291 from falaki/SparkRVersion1.5. (cherry picked from commit 04e0fea79b9acfa3a3cb81dbacb08f9d287b42c3) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9b42e240 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9b42e240 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9b42e240 Branch: refs/heads/branch-1.5 Commit: 9b42e24049e072b315ec80e5bbe2ec5079a94704 Parents: 4ee225a Author: Hossein hoss...@databricks.com Authored: Tue Aug 18 18:02:22 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Tue Aug 18 18:02:31 2015 -0700 -- R/pkg/DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9b42e240/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 83e6489..d0d7201 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,7 +1,7 @@ Package: SparkR Type: Package Title: R frontend for Spark -Version: 1.4.0 +Version: 1.5.0 Date: 2013-09-09 Author: The Apache Software Foundation Maintainer: Shivaram Venkataraman shiva...@cs.berkeley.edu - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-10007] [SPARKR] Update `NAMESPACE` file in SparkR for simple parameters functions
Repository: spark Updated Branches: refs/heads/master 5723d26d7 - 1968276af [SPARK-10007] [SPARKR] Update `NAMESPACE` file in SparkR for simple parameters functions ### JIRA [[SPARK-10007] Update `NAMESPACE` file in SparkR for simple parameters functions - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-10007) Author: Yuu ISHIKAWA yuu.ishik...@gmail.com Closes #8277 from yu-iskw/SPARK-10007. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1968276a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1968276a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1968276a Branch: refs/heads/master Commit: 1968276af0f681fe51328b7dd795bd21724a5441 Parents: 5723d26 Author: Yuu ISHIKAWA yuu.ishik...@gmail.com Authored: Tue Aug 18 09:10:59 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Tue Aug 18 09:10:59 2015 -0700 -- R/pkg/NAMESPACE | 50 +++--- 1 file changed, 47 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1968276a/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index fd9dfdf..607aef2 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -87,48 +87,86 @@ exportMethods(abs, alias, approxCountDistinct, asc, + ascii, asin, atan, atan2, avg, + base64, between, + bin, + bitwiseNOT, cast, cbrt, + ceil, ceiling, + concat, contains, cos, cosh, - concat, + count, countDistinct, + crc32, + datediff, + dayofmonth, + dayofyear, desc, endsWith, exp, + explode, expm1, + factorial, + first, floor, getField, getItem, greatest, + hex, + hour, hypot, + initcap, + isNaN, isNotNull, isNull, - lit, last, + last_day, least, + length, + levenshtein, like, + lit, log, log10, log1p, + log2, lower, + ltrim, max, + md5, mean, min, + minute, + month, + months_between, n, n_distinct, + nanvl, + negate, + pmod, + quarter, + reverse, rint, rlike, + round, + rtrim, + second, + sha1, sign, + signum, sin, sinh, + size, + soundex, sqrt, startsWith, substr, @@ -138,7 +176,13 @@ exportMethods(abs, tanh, toDegrees, toRadians, - upper) + to_date, + trim, + unbase64, + unhex, + upper, + weekofyear, + year) exportClasses(GroupedData) exportMethods(agg) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-10007] [SPARKR] Update `NAMESPACE` file in SparkR for simple parameters functions
Repository: spark Updated Branches: refs/heads/branch-1.5 a512250cd - 20a760a00 [SPARK-10007] [SPARKR] Update `NAMESPACE` file in SparkR for simple parameters functions ### JIRA [[SPARK-10007] Update `NAMESPACE` file in SparkR for simple parameters functions - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-10007) Author: Yuu ISHIKAWA yuu.ishik...@gmail.com Closes #8277 from yu-iskw/SPARK-10007. (cherry picked from commit 1968276af0f681fe51328b7dd795bd21724a5441) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/20a760a0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/20a760a0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/20a760a0 Branch: refs/heads/branch-1.5 Commit: 20a760a00ae188a68b877f052842834e8b7570e6 Parents: a512250 Author: Yuu ISHIKAWA yuu.ishik...@gmail.com Authored: Tue Aug 18 09:10:59 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Tue Aug 18 09:11:22 2015 -0700 -- R/pkg/NAMESPACE | 50 +++--- 1 file changed, 47 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/20a760a0/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index fd9dfdf..607aef2 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -87,48 +87,86 @@ exportMethods(abs, alias, approxCountDistinct, asc, + ascii, asin, atan, atan2, avg, + base64, between, + bin, + bitwiseNOT, cast, cbrt, + ceil, ceiling, + concat, contains, cos, cosh, - concat, + count, countDistinct, + crc32, + datediff, + dayofmonth, + dayofyear, desc, endsWith, exp, + explode, expm1, + factorial, + first, floor, getField, getItem, greatest, + hex, + hour, hypot, + initcap, + isNaN, isNotNull, isNull, - lit, last, + last_day, least, + length, + levenshtein, like, + lit, log, log10, log1p, + log2, lower, + ltrim, max, + md5, mean, min, + minute, + month, + months_between, n, n_distinct, + nanvl, + negate, + pmod, + quarter, + reverse, rint, rlike, + round, + rtrim, + second, + sha1, sign, + signum, sin, sinh, + size, + soundex, sqrt, startsWith, substr, @@ -138,7 +176,13 @@ exportMethods(abs, tanh, toDegrees, toRadians, - upper) + to_date, + trim, + unbase64, + unhex, + upper, + weekofyear, + year) exportClasses(GroupedData) exportMethods(agg) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-9053] [SPARKR] Fix spaces around parens, infix operators etc.
Repository: spark Updated Branches: refs/heads/master 6bba7509a - fc0e57e5a [SPARK-9053] [SPARKR] Fix spaces around parens, infix operators etc. ### JIRA [[SPARK-9053] Fix spaces around parens, infix operators etc. - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9053) ### The Result of `lint-r` [The result of lint-r at the rivision:a4c83cb1e4b066cd60264b6572fd3e51d160d26a](https://gist.github.com/yu-iskw/d253d7f8ef351f86443d) Author: Yu ISHIKAWA yuu.ishik...@gmail.com Closes #7584 from yu-iskw/SPARK-9053 and squashes the following commits: 613170f [Yu ISHIKAWA] Ignore a warning about a space before a left parentheses ede61e1 [Yu ISHIKAWA] Ignores two warnings about a space before a left parentheses. TODO: After updating `lintr`, we will remove the ignores de3e0db [Yu ISHIKAWA] Add '## nolint start' '## nolint end' statement to ignore infix space warnings e233ea8 [Yu ISHIKAWA] [SPARK-9053][SparkR] Fix spaces around parens, infix operators etc. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fc0e57e5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fc0e57e5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fc0e57e5 Branch: refs/heads/master Commit: fc0e57e5aba82a3f227fef05a843283e2ec893fc Parents: 6bba750 Author: Yu ISHIKAWA yuu.ishik...@gmail.com Authored: Fri Jul 31 09:33:38 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Fri Jul 31 09:33:38 2015 -0700 -- R/pkg/R/DataFrame.R | 4 R/pkg/R/RDD.R | 7 +-- R/pkg/R/column.R| 2 +- R/pkg/R/context.R | 2 +- R/pkg/R/pairRDD.R | 2 +- R/pkg/R/utils.R | 4 ++-- R/pkg/inst/tests/test_binary_function.R | 2 +- R/pkg/inst/tests/test_rdd.R | 6 +++--- R/pkg/inst/tests/test_sparkSQL.R| 4 +++- 9 files changed, 21 insertions(+), 12 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fc0e57e5/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index f4c93d3..b31ad37 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1322,9 +1322,11 @@ setMethod(write.df, org.apache.spark.sql.parquet) } allModes - c(append, overwrite, error, ignore) +# nolint start if (!(mode %in% allModes)) { stop('mode should be one of append, overwrite, error, ignore') } +# nolint end jmode - callJStatic(org.apache.spark.sql.api.r.SQLUtils, saveMode, mode) options - varargsToEnv(...) if (!is.null(path)) { @@ -1384,9 +1386,11 @@ setMethod(saveAsTable, org.apache.spark.sql.parquet) } allModes - c(append, overwrite, error, ignore) +# nolint start if (!(mode %in% allModes)) { stop('mode should be one of append, overwrite, error, ignore') } +# nolint end jmode - callJStatic(org.apache.spark.sql.api.r.SQLUtils, saveMode, mode) options - varargsToEnv(...) callJMethod(df@sdf, saveAsTable, tableName, source, jmode, options) http://git-wip-us.apache.org/repos/asf/spark/blob/fc0e57e5/R/pkg/R/RDD.R -- diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R index d2d0967..2a013b3 100644 --- a/R/pkg/R/RDD.R +++ b/R/pkg/R/RDD.R @@ -85,7 +85,9 @@ setMethod(initialize, PipelinedRDD, function(.Object, prev, func, jrdd_val) isPipelinable - function(rdd) { e - rdd@env +# nolint start !(e$isCached || e$isCheckpointed) +# nolint end } if (!inherits(prev, PipelinedRDD) || !isPipelinable(prev)) { @@ -97,7 +99,8 @@ setMethod(initialize, PipelinedRDD, function(.Object, prev, func, jrdd_val) # prev_serializedMode is used during the delayed computation of JRDD in getJRDD } else { pipelinedFunc - function(partIndex, part) { - func(partIndex, prev@func(partIndex, part)) + f - prev@func + func(partIndex, f(partIndex, part)) } .Object@func - cleanClosure(pipelinedFunc) .Object@prev_jrdd - prev@prev_jrdd # maintain the pipeline @@ -841,7 +844,7 @@ setMethod(sampleRDD, if (withReplacement) { count - rpois(1, fraction) if (count 0) { -res[(len + 1):(len + count)] - rep(list(elem), count) +res[ (len + 1) : (len + count) ] - rep(list(elem), count) len - len + count