from:"shivaram"

git commit: Fixing typo in als.py

2014-05-10 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.0 6f701ff55 - 98944a973


Fixing typo in als.py

XtY should be Xty.

Author: Evan Sparks evan.spa...@gmail.com

Closes #696 from etrain/patch-2 and squashes the following commits:

634cb8d [Evan Sparks] Fixing typo in als.py


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/98944a97
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/98944a97
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/98944a97

Branch: refs/heads/branch-1.0
Commit: 98944a9734389cd4400516a1eb3afa5376f44927
Parents: 6f701ff
Author: Evan Sparks evan.spa...@gmail.com
Authored: Thu May 8 13:07:30 2014 -0700
Committer: Shivaram Venkataraman shiva...@eecs.berkeley.edu
Committed: Thu May 8 16:49:33 2014 -0700

--
 examples/src/main/python/als.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/98944a97/examples/src/main/python/als.py
--
diff --git a/examples/src/main/python/als.py b/examples/src/main/python/als.py
index 33700ab..01552dc 100755
--- a/examples/src/main/python/als.py
+++ b/examples/src/main/python/als.py
@@ -38,7 +38,7 @@ def update(i, vec, mat, ratings):
 ff = mat.shape[1]
 
 XtX = mat.T * mat
-XtY = mat.T * ratings[i, :].T
+Xty = mat.T * ratings[i, :].T
 
 for j in range(ff):
 XtX[j,j] += LAMBDA * uu

git commit: Fixing typo in als.py

2014-05-15 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master c3f8b78c2 - 5c5e7d580


Fixing typo in als.py

XtY should be Xty.

Author: Evan Sparks evan.spa...@gmail.com

Closes #696 from etrain/patch-2 and squashes the following commits:

634cb8d [Evan Sparks] Fixing typo in als.py


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5c5e7d58
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5c5e7d58
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5c5e7d58

Branch: refs/heads/master
Commit: 5c5e7d5809d337ce41a7a90eb9201e12803aba48
Parents: c3f8b78
Author: Evan Sparks evan.spa...@gmail.com
Authored: Thu May 8 13:07:30 2014 -0700
Committer: Shivaram Venkataraman shiva...@eecs.berkeley.edu
Committed: Thu May 8 13:07:30 2014 -0700

--
 examples/src/main/python/als.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5c5e7d58/examples/src/main/python/als.py
--
diff --git a/examples/src/main/python/als.py b/examples/src/main/python/als.py
index 33700ab..01552dc 100755
--- a/examples/src/main/python/als.py
+++ b/examples/src/main/python/als.py
@@ -38,7 +38,7 @@ def update(i, vec, mat, ratings):
 ff = mat.shape[1]
 
 XtX = mat.T * mat
-XtY = mat.T * ratings[i, :].T
+Xty = mat.T * ratings[i, :].T
 
 for j in range(ff):
 XtX[j,j] += LAMBDA * uu

git commit: [SPARK-2950] Add gc time and shuffle write time to JobLogger

2014-08-10 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 3570119c3 - 1d03a26a4


[SPARK-2950] Add gc time and shuffle write time to JobLogger

The JobLogger is very useful for performing offline performance profiling of 
Spark jobs. GC Time and Shuffle Write time are available in TaskMetrics but are 
currently missed from the JobLogger output. This patch adds these two fields.

~~Since this is a small change, I didn't create a JIRA. Let me know if I should 
do that.~~

cc kayousterhout

Author: Shivaram Venkataraman shiva...@cs.berkeley.edu

Closes #1869 from shivaram/job-logger and squashes the following commits:

1b709fc [Shivaram Venkataraman] Add a space before GC_TIME
c418105 [Shivaram Venkataraman] Add gc time and shuffle write time to JobLogger


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1d03a26a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1d03a26a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1d03a26a

Branch: refs/heads/master
Commit: 1d03a26a4895c24ebfab1a3cf6656af75cb53003
Parents: 3570119
Author: Shivaram Venkataraman shiva...@cs.berkeley.edu
Authored: Sun Aug 10 12:44:17 2014 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Sun Aug 10 12:44:17 2014 -0700

--
 .../main/scala/org/apache/spark/scheduler/JobLogger.scala   | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1d03a26a/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala
--
diff --git a/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala 
b/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala
index 47dd112..4d6b5c8 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala
@@ -162,6 +162,7 @@ class JobLogger(val user: String, val logDirName: String) 
extends SparkListener
 START_TIME= + taskInfo.launchTime +  FINISH_TIME= + 
taskInfo.finishTime +
 EXECUTOR_ID= + taskInfo.executorId +   HOST= + 
taskMetrics.hostname
 val executorRunTime =  EXECUTOR_RUN_TIME= + taskMetrics.executorRunTime
+val gcTime =  GC_TIME= + taskMetrics.jvmGCTime
 val inputMetrics = taskMetrics.inputMetrics match {
   case Some(metrics) =
  READ_METHOD= + metrics.readMethod.toString +
@@ -179,11 +180,13 @@ class JobLogger(val user: String, val logDirName: String) 
extends SparkListener
   case None = 
 }
 val writeMetrics = taskMetrics.shuffleWriteMetrics match {
-  case Some(metrics) =  SHUFFLE_BYTES_WRITTEN= + 
metrics.shuffleBytesWritten
+  case Some(metrics) =
+ SHUFFLE_BYTES_WRITTEN= + metrics.shuffleBytesWritten +
+ SHUFFLE_WRITE_TIME= + metrics.shuffleWriteTime
   case None = 
 }
-stageLogInfo(stageId, status + info + executorRunTime + inputMetrics + 
shuffleReadMetrics +
-  writeMetrics)
+stageLogInfo(stageId, status + info + executorRunTime + gcTime + 
inputMetrics +
+  shuffleReadMetrics + writeMetrics)
   }
 
   /**


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

git commit: [EC2] Factor out Mesos spark-ec2 branch

2014-11-03 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 76386e1a2 - 2aca97c7c


[EC2] Factor out Mesos spark-ec2 branch

We reference a specific branch in two places. This patch makes it one place.

Author: Nicholas Chammas nicholas.cham...@gmail.com

Closes #3008 from nchammas/mesos-spark-ec2-branch and squashes the following 
commits:

10a6089 [Nicholas Chammas] factor out mess spark-ec2 branch


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2aca97c7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2aca97c7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2aca97c7

Branch: refs/heads/master
Commit: 2aca97c7cfdefea8b6f9dbb88951e9acdfd606d9
Parents: 76386e1
Author: Nicholas Chammas nicholas.cham...@gmail.com
Authored: Mon Nov 3 09:02:35 2014 -0800
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Mon Nov 3 09:02:35 2014 -0800

--
 ec2/spark_ec2.py | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2aca97c7/ec2/spark_ec2.py
--
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index 0d6b82b..50f88f7 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -41,8 +41,9 @@ from boto import ec2
 
 DEFAULT_SPARK_VERSION = 1.1.0
 
+MESOS_SPARK_EC2_BRANCH = v4
 # A URL prefix from which to fetch AMI information
-AMI_PREFIX = https://raw.github.com/mesos/spark-ec2/v2/ami-list;
+AMI_PREFIX = 
https://raw.github.com/mesos/spark-ec2/{b}/ami-list.format(b=MESOS_SPARK_EC2_BRANCH)
 
 
 class UsageError(Exception):
@@ -583,7 +584,13 @@ def setup_cluster(conn, master_nodes, slave_nodes, opts, 
deploy_ssh_key):
 
 # NOTE: We should clone the repository before running deploy_files to
 # prevent ec2-variables.sh from being overwritten
-ssh(master, opts, rm -rf spark-ec2  git clone 
https://github.com/mesos/spark-ec2.git -b v4)
+ssh(
+host=master,
+opts=opts,
+command=rm -rf spark-ec2
++   
++ git clone https://github.com/mesos/spark-ec2.git -b 
{b}.format(b=MESOS_SPARK_EC2_BRANCH)
+)
 
 print Deploying files to master...
 deploy_files(conn, deploy.generic, opts, master_nodes, slave_nodes, 
modules)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

git commit: [SPARK-4137] [EC2] Don't change working dir on user

2014-11-05 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 3d2b5bc5b - db45f5ad0


[SPARK-4137] [EC2] Don't change working dir on user

This issue was uncovered after [this 
discussion](https://issues.apache.org/jira/browse/SPARK-3398?focusedCommentId=14187471page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14187471).

Don't change the working directory on the user. This breaks relative paths the 
user may pass in, e.g., for the SSH identity file.

```
./ec2/spark-ec2 -i ../my.pem
```

This patch will preserve the user's current working directory and allow calls 
like the one above to work.

Author: Nicholas Chammas nicholas.cham...@gmail.com

Closes #2988 from nchammas/spark-ec2-cwd and squashes the following commits:

f3850b5 [Nicholas Chammas] pep8 fix
fbc20c7 [Nicholas Chammas] revert to old commenting style
752f958 [Nicholas Chammas] specify deploy.generic path absolutely
bcdf6a5 [Nicholas Chammas] fix typo
77871a2 [Nicholas Chammas] add clarifying comment
ce071fc [Nicholas Chammas] don't change working dir


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/db45f5ad
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/db45f5ad
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/db45f5ad

Branch: refs/heads/master
Commit: db45f5ad0368760dbeaa618a04f66ae9b2bed656
Parents: 3d2b5bc
Author: Nicholas Chammas nicholas.cham...@gmail.com
Authored: Wed Nov 5 20:45:35 2014 -0800
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Wed Nov 5 20:45:35 2014 -0800

--
 ec2/spark-ec2|  8 ++--
 ec2/spark_ec2.py | 12 +++-
 2 files changed, 17 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/db45f5ad/ec2/spark-ec2
--
diff --git a/ec2/spark-ec2 b/ec2/spark-ec2
index 31f9771..4aa9082 100755
--- a/ec2/spark-ec2
+++ b/ec2/spark-ec2
@@ -18,5 +18,9 @@
 # limitations under the License.
 #
 
-cd `dirname $0`
-PYTHONPATH=./third_party/boto-2.4.1.zip/boto-2.4.1:$PYTHONPATH python 
./spark_ec2.py $@
+# Preserve the user's CWD so that relative paths are passed correctly to 
+#+ the underlying Python script.
+SPARK_EC2_DIR=$(dirname $0)
+
+PYTHONPATH=${SPARK_EC2_DIR}/third_party/boto-2.4.1.zip/boto-2.4.1:$PYTHONPATH
 \
+python ${SPARK_EC2_DIR}/spark_ec2.py $@

http://git-wip-us.apache.org/repos/asf/spark/blob/db45f5ad/ec2/spark_ec2.py
--
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index 50f88f7..a5396c2 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -40,6 +40,7 @@ from boto.ec2.blockdevicemapping import BlockDeviceMapping, 
BlockDeviceType, EBS
 from boto import ec2
 
 DEFAULT_SPARK_VERSION = 1.1.0
+SPARK_EC2_DIR = os.path.dirname(os.path.realpath(__file__))
 
 MESOS_SPARK_EC2_BRANCH = v4
 # A URL prefix from which to fetch AMI information
@@ -593,7 +594,14 @@ def setup_cluster(conn, master_nodes, slave_nodes, opts, 
deploy_ssh_key):
 )
 
 print Deploying files to master...
-deploy_files(conn, deploy.generic, opts, master_nodes, slave_nodes, 
modules)
+deploy_files(
+conn=conn,
+root_dir=SPARK_EC2_DIR + / + deploy.generic,
+opts=opts,
+master_nodes=master_nodes,
+slave_nodes=slave_nodes,
+modules=modules
+)
 
 print Running setup on master...
 setup_spark_cluster(master, opts)
@@ -730,6 +738,8 @@ def get_num_disks(instance_type):
 # cluster (e.g. lists of masters and slaves). Files are only deployed to
 # the first master instance in the cluster, and we expect the setup
 # script to be run on that instance to copy them to other nodes.
+#
+# root_dir should be an absolute path to the directory with the files we want 
to deploy.
 def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules):
 active_master = master_nodes[0].public_dns_name
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

git commit: [SPARK-4137] [EC2] Don't change working dir on user

2014-11-05 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.2 7e0da9f6b - 70f6f36e0


[SPARK-4137] [EC2] Don't change working dir on user

This issue was uncovered after [this 
discussion](https://issues.apache.org/jira/browse/SPARK-3398?focusedCommentId=14187471page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14187471).

Don't change the working directory on the user. This breaks relative paths the 
user may pass in, e.g., for the SSH identity file.

```
./ec2/spark-ec2 -i ../my.pem
```

This patch will preserve the user's current working directory and allow calls 
like the one above to work.

Author: Nicholas Chammas nicholas.cham...@gmail.com

Closes #2988 from nchammas/spark-ec2-cwd and squashes the following commits:

f3850b5 [Nicholas Chammas] pep8 fix
fbc20c7 [Nicholas Chammas] revert to old commenting style
752f958 [Nicholas Chammas] specify deploy.generic path absolutely
bcdf6a5 [Nicholas Chammas] fix typo
77871a2 [Nicholas Chammas] add clarifying comment
ce071fc [Nicholas Chammas] don't change working dir

(cherry picked from commit db45f5ad0368760dbeaa618a04f66ae9b2bed656)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/70f6f36e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/70f6f36e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/70f6f36e

Branch: refs/heads/branch-1.2
Commit: 70f6f36e03f97847cd2f3e4fe2902bb8459ca6a3
Parents: 7e0da9f
Author: Nicholas Chammas nicholas.cham...@gmail.com
Authored: Wed Nov 5 20:45:35 2014 -0800
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Wed Nov 5 20:45:55 2014 -0800

--
 ec2/spark-ec2|  8 ++--
 ec2/spark_ec2.py | 12 +++-
 2 files changed, 17 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/70f6f36e/ec2/spark-ec2
--
diff --git a/ec2/spark-ec2 b/ec2/spark-ec2
index 31f9771..4aa9082 100755
--- a/ec2/spark-ec2
+++ b/ec2/spark-ec2
@@ -18,5 +18,9 @@
 # limitations under the License.
 #
 
-cd `dirname $0`
-PYTHONPATH=./third_party/boto-2.4.1.zip/boto-2.4.1:$PYTHONPATH python 
./spark_ec2.py $@
+# Preserve the user's CWD so that relative paths are passed correctly to 
+#+ the underlying Python script.
+SPARK_EC2_DIR=$(dirname $0)
+
+PYTHONPATH=${SPARK_EC2_DIR}/third_party/boto-2.4.1.zip/boto-2.4.1:$PYTHONPATH
 \
+python ${SPARK_EC2_DIR}/spark_ec2.py $@

http://git-wip-us.apache.org/repos/asf/spark/blob/70f6f36e/ec2/spark_ec2.py
--
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index 50f88f7..a5396c2 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -40,6 +40,7 @@ from boto.ec2.blockdevicemapping import BlockDeviceMapping, 
BlockDeviceType, EBS
 from boto import ec2
 
 DEFAULT_SPARK_VERSION = 1.1.0
+SPARK_EC2_DIR = os.path.dirname(os.path.realpath(__file__))
 
 MESOS_SPARK_EC2_BRANCH = v4
 # A URL prefix from which to fetch AMI information
@@ -593,7 +594,14 @@ def setup_cluster(conn, master_nodes, slave_nodes, opts, 
deploy_ssh_key):
 )
 
 print Deploying files to master...
-deploy_files(conn, deploy.generic, opts, master_nodes, slave_nodes, 
modules)
+deploy_files(
+conn=conn,
+root_dir=SPARK_EC2_DIR + / + deploy.generic,
+opts=opts,
+master_nodes=master_nodes,
+slave_nodes=slave_nodes,
+modules=modules
+)
 
 print Running setup on master...
 setup_spark_cluster(master, opts)
@@ -730,6 +738,8 @@ def get_num_disks(instance_type):
 # cluster (e.g. lists of masters and slaves). Files are only deployed to
 # the first master instance in the cluster, and we expect the setup
 # script to be run on that instance to copy them to other nodes.
+#
+# root_dir should be an absolute path to the directory with the files we want 
to deploy.
 def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules):
 active_master = master_nodes[0].public_dns_name
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [Minor][SparkR] Minor refactor and removes redundancy related to cleanClosure.

2015-04-13 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master b45059d0d - 0ba3fdd59


[Minor][SparkR] Minor refactor and removes redundancy related to cleanClosure.

1. Only use `cleanClosure` in creation of RRDDs. Normally, user and developer 
do not need to call `cleanClosure` in their function definition.
2. Removes redundant code (e.g. unnecessary wrapper functions) related to 
`cleanClosure`.

Author: hlin09 hlin0...@gmail.com

Closes #5495 from hlin09/cleanClosureFix and squashes the following commits:

74ec303 [hlin09] Minor refactor and removes redundancy.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0ba3fdd5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0ba3fdd5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0ba3fdd5

Branch: refs/heads/master
Commit: 0ba3fdd5992cf09bd38303ebff34d2ed19e5e09b
Parents: b45059d
Author: hlin09 hlin0...@gmail.com
Authored: Mon Apr 13 20:43:24 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Mon Apr 13 20:43:24 2015 -0700

--
 R/pkg/R/RDD.R | 16 
 R/pkg/R/pairRDD.R |  4 
 2 files changed, 4 insertions(+), 16 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0ba3fdd5/R/pkg/R/RDD.R
--
diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
index d6a7500..820027e 100644
--- a/R/pkg/R/RDD.R
+++ b/R/pkg/R/RDD.R
@@ -85,7 +85,7 @@ setMethod(initialize, PipelinedRDD, function(.Object, 
prev, func, jrdd_val)
 
   if (!inherits(prev, PipelinedRDD) || !isPipelinable(prev)) {
 # This transformation is the first in its stage:
-.Object@func - func
+.Object@func - cleanClosure(func)
 .Object@prev_jrdd - getJRDD(prev)
 .Object@env$prev_serializedMode - prev@env$serializedMode
 # NOTE: We use prev_serializedMode to track the serialization mode of 
prev_JRDD
@@ -94,7 +94,7 @@ setMethod(initialize, PipelinedRDD, function(.Object, 
prev, func, jrdd_val)
 pipelinedFunc - function(split, iterator) {
   func(split, prev@func(split, iterator))
 }
-.Object@func - pipelinedFunc
+.Object@func - cleanClosure(pipelinedFunc)
 .Object@prev_jrdd - prev@prev_jrdd # maintain the pipeline
 # Get the serialization mode of the parent RDD
 .Object@env$prev_serializedMode - prev@env$prev_serializedMode
@@ -144,17 +144,13 @@ setMethod(getJRDD, signature(rdd = PipelinedRDD),
   return(rdd@env$jrdd_val)
 }
 
-computeFunc - function(split, part) {
-  rdd@func(split, part)
-}
-
 packageNamesArr - serialize(.sparkREnv[[.packages]],
  connection = NULL)
 
 broadcastArr - lapply(ls(.broadcastNames),
function(name) { get(name, .broadcastNames) 
})
 
-serializedFuncArr - serialize(computeFunc, connection = NULL)
+serializedFuncArr - serialize(rdd@func, connection = NULL)
 
 prev_jrdd - rdd@prev_jrdd
 
@@ -551,11 +547,7 @@ setMethod(mapPartitions,
 setMethod(lapplyPartitionsWithIndex,
   signature(X = RDD, FUN = function),
   function(X, FUN) {
-FUN - cleanClosure(FUN)
-closureCapturingFunc - function(split, part) {
-  FUN(split, part)
-}
-PipelinedRDD(X, closureCapturingFunc)
+PipelinedRDD(X, FUN)
   })
 
 #' @rdname lapplyPartitionsWithIndex

http://git-wip-us.apache.org/repos/asf/spark/blob/0ba3fdd5/R/pkg/R/pairRDD.R
--
diff --git a/R/pkg/R/pairRDD.R b/R/pkg/R/pairRDD.R
index c2396c3..739d399 100644
--- a/R/pkg/R/pairRDD.R
+++ b/R/pkg/R/pairRDD.R
@@ -694,10 +694,6 @@ setMethod(cogroup,
 for (i in 1:rddsLen) {
   rdds[[i]] - lapply(rdds[[i]], 
   function(x) { list(x[[1]], list(i, x[[2]])) 
})
-  # TODO(hao): As issue [SparkR-142] mentions, the right value of i
-  # will not be captured into UDF if getJRDD is not invoked.
-  # It should be resolved together with that issue.
-  getJRDD(rdds[[i]])  # Capture the closure.
 }
 union.rdd - Reduce(unionRDD, rdds)
 group.func - function(vlist) {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-6818] [SPARKR] Support column deletion in SparkR DataFrame API.

2015-04-23 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 6220d933e - 73db132bf


[SPARK-6818] [SPARKR] Support column deletion in SparkR DataFrame API.

Author: Sun Rui rui@intel.com

Closes #5655 from sun-rui/SPARK-6818 and squashes the following commits:

7c66570 [Sun Rui] [SPARK-6818][SPARKR] Support column deletion in SparkR 
DataFrame API.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/73db132b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/73db132b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/73db132b

Branch: refs/heads/master
Commit: 73db132bf503341c7a5cf9409351c282a8464175
Parents: 6220d93
Author: Sun Rui rui@intel.com
Authored: Thu Apr 23 16:08:14 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Thu Apr 23 16:08:14 2015 -0700

--
 R/pkg/R/DataFrame.R  | 8 +++-
 R/pkg/inst/tests/test_sparkSQL.R | 5 +
 2 files changed, 12 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/73db132b/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 861fe1c..b59b700 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -790,9 +790,12 @@ setMethod($, signature(x = DataFrame),
 
 setMethod($-, signature(x = DataFrame),
   function(x, name, value) {
-stopifnot(class(value) == Column)
+stopifnot(class(value) == Column || is.null(value))
 cols - columns(x)
 if (name %in% cols) {
+  if (is.null(value)) {
+cols - Filter(function(c) { c != name }, cols)
+  }
   cols - lapply(cols, function(c) {
 if (c == name) {
   alias(value, name)
@@ -802,6 +805,9 @@ setMethod($-, signature(x = DataFrame),
   })
   nx - select(x, cols)
 } else {
+  if (is.null(value)) {
+return(x)
+  }
   nx - withColumn(x, name, value)
 }
 x@sdf - nx@sdf

http://git-wip-us.apache.org/repos/asf/spark/blob/73db132b/R/pkg/inst/tests/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 25831ae..af7a6c5 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -449,6 +449,11 @@ test_that(select operators, {
   df$age2 - df$age * 2
   expect_equal(columns(df), c(name, age, age2))
   expect_equal(count(where(df, df$age2 == df$age * 2)), 2)
+
+  df$age2 - NULL
+  expect_equal(columns(df), c(name, age))
+  df$age3 - NULL
+  expect_equal(columns(df), c(name, age))
 })
 
 test_that(select with column, {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-7033] [SPARKR] Clean usage of split. Use partition instead where applicable.

2015-04-24 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 6e57d57b3 - ebb77b2af


[SPARK-7033] [SPARKR] Clean usage of split. Use partition instead where 
applicable.

Author: Sun Rui rui@intel.com

Closes #5628 from sun-rui/SPARK-7033 and squashes the following commits:

046bc9e [Sun Rui] Clean split usage in tests.
d531c86 [Sun Rui] [SPARK-7033][SPARKR] Clean usage of split. Use partition 
instead where applicable.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ebb77b2a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ebb77b2a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ebb77b2a

Branch: refs/heads/master
Commit: ebb77b2aff085e71906b5de9d266ded89051af82
Parents: 6e57d57
Author: Sun Rui rui@intel.com
Authored: Fri Apr 24 11:00:19 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Fri Apr 24 11:00:19 2015 -0700

--
 R/pkg/R/RDD.R   | 36 ++--
 R/pkg/R/context.R   | 20 ++--
 R/pkg/R/pairRDD.R   |  8 
 R/pkg/R/utils.R |  2 +-
 R/pkg/inst/tests/test_rdd.R | 12 ++--
 5 files changed, 39 insertions(+), 39 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ebb77b2a/R/pkg/R/RDD.R
--
diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
index 1284313..cc09efb 100644
--- a/R/pkg/R/RDD.R
+++ b/R/pkg/R/RDD.R
@@ -91,8 +91,8 @@ setMethod(initialize, PipelinedRDD, function(.Object, 
prev, func, jrdd_val)
 # NOTE: We use prev_serializedMode to track the serialization mode of 
prev_JRDD
 # prev_serializedMode is used during the delayed computation of JRDD in 
getJRDD
   } else {
-pipelinedFunc - function(split, iterator) {
-  func(split, prev@func(split, iterator))
+pipelinedFunc - function(partIndex, part) {
+  func(partIndex, prev@func(partIndex, part))
 }
 .Object@func - cleanClosure(pipelinedFunc)
 .Object@prev_jrdd - prev@prev_jrdd # maintain the pipeline
@@ -306,7 +306,7 @@ setMethod(numPartitions,
   signature(x = RDD),
   function(x) {
 jrdd - getJRDD(x)
-partitions - callJMethod(jrdd, splits)
+partitions - callJMethod(jrdd, partitions)
 callJMethod(partitions, size)
   })
 
@@ -452,8 +452,8 @@ setMethod(countByValue,
 setMethod(lapply,
   signature(X = RDD, FUN = function),
   function(X, FUN) {
-func - function(split, iterator) {
-  lapply(iterator, FUN)
+func - function(partIndex, part) {
+  lapply(part, FUN)
 }
 lapplyPartitionsWithIndex(X, func)
   })
@@ -538,8 +538,8 @@ setMethod(mapPartitions,
 #'\dontrun{
 #' sc - sparkR.init()
 #' rdd - parallelize(sc, 1:10, 5L)
-#' prod - lapplyPartitionsWithIndex(rdd, function(split, part) {
-#'  split * Reduce(+, part) })
+#' prod - lapplyPartitionsWithIndex(rdd, function(partIndex, part) {
+#'  partIndex * Reduce(+, part) })
 #' collect(prod, flatten = FALSE) # 0, 7, 22, 45, 76
 #'}
 #' @rdname lapplyPartitionsWithIndex
@@ -813,7 +813,7 @@ setMethod(distinct,
 #' @examples
 #'\dontrun{
 #' sc - sparkR.init()
-#' rdd - parallelize(sc, 1:10) # ensure each num is in its own split
+#' rdd - parallelize(sc, 1:10)
 #' collect(sampleRDD(rdd, FALSE, 0.5, 1618L)) # ~5 distinct elements
 #' collect(sampleRDD(rdd, TRUE, 0.5, 9L)) # ~5 elements possibly with 
duplicates
 #'}
@@ -825,14 +825,14 @@ setMethod(sampleRDD,
   function(x, withReplacement, fraction, seed) {
 
 # The sampler: takes a partition and returns its sampled version.
-samplingFunc - function(split, part) {
+samplingFunc - function(partIndex, part) {
   set.seed(seed)
   res - vector(list, length(part))
   len - 0
 
   # Discards some random values to ensure each partition has a
   # different random seed.
-  runif(split)
+  runif(partIndex)
 
   for (elem in part) {
 if (withReplacement) {
@@ -989,8 +989,8 @@ setMethod(coalesce,
function(x, numPartitions, shuffle = FALSE) {
  numPartitions - numToInt(numPartitions)
  if (shuffle || numPartitions  SparkR::numPartitions(x)) {
-   func - function(s, part) {
- set.seed(s)  # split as seed
+   func - function(partIndex, part) {
+ set.seed(partIndex)  # partIndex as seed
  start - as.integer(sample(numPartitions, 1) - 1)
  lapply(seq_along(part),
 function(i

spark git commit: [SPARK-6852] [SPARKR] Accept numeric as numPartitions in SparkR.

2015-04-24 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master ebb77b2af - caf0136ec


[SPARK-6852] [SPARKR] Accept numeric as numPartitions in SparkR.

Author: Sun Rui rui@intel.com

Closes #5613 from sun-rui/SPARK-6852 and squashes the following commits:

abaf02e [Sun Rui] Change the type of default numPartitions from integer to 
numeric in generics.R.
29d67c1 [Sun Rui] [SPARK-6852][SPARKR] Accept numeric as numPartitions in 
SparkR.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/caf0136e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/caf0136e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/caf0136e

Branch: refs/heads/master
Commit: caf0136ec5838cf5bf61f39a5b3474a505a6ae11
Parents: ebb77b2
Author: Sun Rui rui@intel.com
Authored: Fri Apr 24 12:52:07 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Fri Apr 24 12:52:07 2015 -0700

--
 R/pkg/R/RDD.R  |  2 +-
 R/pkg/R/generics.R | 12 ++--
 R/pkg/R/pairRDD.R  | 24 
 3 files changed, 19 insertions(+), 19 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/caf0136e/R/pkg/R/RDD.R
--
diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
index cc09efb..1662d6b 100644
--- a/R/pkg/R/RDD.R
+++ b/R/pkg/R/RDD.R
@@ -967,7 +967,7 @@ setMethod(keyBy,
 setMethod(repartition,
   signature(x = RDD, numPartitions = numeric),
   function(x, numPartitions) {
-coalesce(x, numToInt(numPartitions), TRUE)
+coalesce(x, numPartitions, TRUE)
   })
 
 #' Return a new RDD that is reduced into numPartitions partitions.

http://git-wip-us.apache.org/repos/asf/spark/blob/caf0136e/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 6c62333..34dbe84 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -60,7 +60,7 @@ setGeneric(countByValue, function(x) { 
standardGeneric(countByValue) })
 
 #' @rdname distinct
 #' @export
-setGeneric(distinct, function(x, numPartitions = 1L) { 
standardGeneric(distinct) })
+setGeneric(distinct, function(x, numPartitions = 1) { 
standardGeneric(distinct) })
 
 #' @rdname filterRDD
 #' @export
@@ -182,7 +182,7 @@ setGeneric(setName, function(x, name) { 
standardGeneric(setName) })
 #' @rdname sortBy
 #' @export
 setGeneric(sortBy,
-   function(x, func, ascending = TRUE, numPartitions = 1L) {
+   function(x, func, ascending = TRUE, numPartitions = 1) {
  standardGeneric(sortBy)
})
 
@@ -244,7 +244,7 @@ setGeneric(flatMapValues, function(X, FUN) { 
standardGeneric(flatMapValues)
 
 #' @rdname intersection
 #' @export
-setGeneric(intersection, function(x, other, numPartitions = 1L) {
+setGeneric(intersection, function(x, other, numPartitions = 1) {
   standardGeneric(intersection) })
 
 #' @rdname keys
@@ -346,21 +346,21 @@ setGeneric(rightOuterJoin, function(x, y, 
numPartitions) { standardGeneric(ri
 #' @rdname sortByKey
 #' @export
 setGeneric(sortByKey,
-   function(x, ascending = TRUE, numPartitions = 1L) {
+   function(x, ascending = TRUE, numPartitions = 1) {
  standardGeneric(sortByKey)
})
 
 #' @rdname subtract
 #' @export
 setGeneric(subtract,
-   function(x, other, numPartitions = 1L) {
+   function(x, other, numPartitions = 1) {
  standardGeneric(subtract)
})
 
 #' @rdname subtractByKey
 #' @export
 setGeneric(subtractByKey, 
-   function(x, other, numPartitions = 1L) {
+   function(x, other, numPartitions = 1) {
  standardGeneric(subtractByKey)
})
 

http://git-wip-us.apache.org/repos/asf/spark/blob/caf0136e/R/pkg/R/pairRDD.R
--
diff --git a/R/pkg/R/pairRDD.R b/R/pkg/R/pairRDD.R
index f99b474..9791e55 100644
--- a/R/pkg/R/pairRDD.R
+++ b/R/pkg/R/pairRDD.R
@@ -190,7 +190,7 @@ setMethod(flatMapValues,
 #' @rdname partitionBy
 #' @aliases partitionBy,RDD,integer-method
 setMethod(partitionBy,
-  signature(x = RDD, numPartitions = integer),
+  signature(x = RDD, numPartitions = numeric),
   function(x, numPartitions, partitionFunc = hashCode) {
 
 #if (missing(partitionFunc)) {
@@ -211,7 +211,7 @@ setMethod(partitionBy,
 # the content (key-val pairs).
 pairwiseRRDD - newJObject(org.apache.spark.api.r.PairwiseRRDD,
callJMethod(jrdd, rdd),
-   as.integer(numPartitions),
+   numToInt(numPartitions

spark git commit: [SPARK-6856] [R] Make RDD information more useful in SparkR

2015-04-27 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 998aac21f - 7078f6028


[SPARK-6856] [R] Make RDD information more useful in SparkR

Author: Jeff Harrison jeffrharri...@gmail.com

Closes #5667 from His-name-is-Joof/joofspark and squashes the following commits:

f8814a6 [Jeff Harrison] newline added after RDD show() output
4d9d972 [Jeff Harrison] Merge branch 'master' into joofspark
9d2295e [Jeff Harrison] parallelize with 1:10
878b830 [Jeff Harrison] Merge branch 'master' into joofspark
c8c0b80 [Jeff Harrison] add test for RDD function show()
123be65 [Jeff Harrison] SPARK-6856


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7078f602
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7078f602
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7078f602

Branch: refs/heads/master
Commit: 7078f6028bf012235c664b02ec3541cbb0a248a7
Parents: 998aac2
Author: Jeff Harrison jeffrharri...@gmail.com
Authored: Mon Apr 27 13:38:25 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Mon Apr 27 13:38:25 2015 -0700

--
 R/pkg/R/RDD.R   | 5 +
 R/pkg/inst/tests/test_rdd.R | 5 +
 2 files changed, 10 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7078f602/R/pkg/R/RDD.R
--
diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
index 1662d6b..f90c26b 100644
--- a/R/pkg/R/RDD.R
+++ b/R/pkg/R/RDD.R
@@ -66,6 +66,11 @@ setMethod(initialize, RDD, function(.Object, jrdd, 
serializedMode,
   .Object
 })
 
+setMethod(show, RDD,
+  function(.Object) {
+  cat(paste(callJMethod(.Object@jrdd, toString), \n, sep=))
+  })
+
 setMethod(initialize, PipelinedRDD, function(.Object, prev, func, 
jrdd_val) {
   .Object@env - new.env()
   .Object@env$isCached - FALSE

http://git-wip-us.apache.org/repos/asf/spark/blob/7078f602/R/pkg/inst/tests/test_rdd.R
--
diff --git a/R/pkg/inst/tests/test_rdd.R b/R/pkg/inst/tests/test_rdd.R
index d55af93..0320735 100644
--- a/R/pkg/inst/tests/test_rdd.R
+++ b/R/pkg/inst/tests/test_rdd.R
@@ -759,6 +759,11 @@ test_that(collectAsMap() on a pairwise RDD, {
   expect_equal(vals, list(`1` = a, `2` = b))
 })
 
+test_that(show(), {
+  rdd - parallelize(sc, list(1:10))
+  expect_output(show(rdd), ParallelCollectionRDD\\[\\d+\\] at parallelize at 
RRDD\\.scala:\\d+)
+})
+
 test_that(sampleByKey() on pairwise RDDs, {
   rdd - parallelize(sc, 1:2000)
   pairsRDD - lapply(rdd, function(x) { if (x %% 2 == 0) list(a, x) else 
list(b, x) })


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-6991] [SPARKR] Adds support for zipPartitions.

2015-04-27 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master ef82bddc1 - ca9f4ebb8


[SPARK-6991] [SPARKR] Adds support for zipPartitions.

Author: hlin09 hlin0...@gmail.com

Closes #5568 from hlin09/zipPartitions and squashes the following commits:

12c08a5 [hlin09] Fix comments
d2d32db [hlin09] Merge branch 'master' into zipPartitions
ec56d2f [hlin09] Fix test.
27655d3 [hlin09] Adds support for zipPartitions.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ca9f4ebb
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ca9f4ebb
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ca9f4ebb

Branch: refs/heads/master
Commit: ca9f4ebb8e510e521bf4df0331375ddb385fb9d2
Parents: ef82bdd
Author: hlin09 hlin0...@gmail.com
Authored: Mon Apr 27 15:04:37 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Mon Apr 27 15:04:37 2015 -0700

--
 R/pkg/NAMESPACE |  1 +
 R/pkg/R/RDD.R   | 46 
 R/pkg/R/generics.R  |  5 +++
 R/pkg/inst/tests/test_binary_function.R | 33 
 4 files changed, 85 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ca9f4ebb/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 8028364..e077eac 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -71,6 +71,7 @@ exportMethods(
   unpersist,
   value,
   values,
+  zipPartitions,
   zipRDD,
   zipWithIndex,
   zipWithUniqueId

http://git-wip-us.apache.org/repos/asf/spark/blob/ca9f4ebb/R/pkg/R/RDD.R
--
diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
index f90c26b..a3a0421 100644
--- a/R/pkg/R/RDD.R
+++ b/R/pkg/R/RDD.R
@@ -1595,3 +1595,49 @@ setMethod(intersection,
 
 keys(filterRDD(cogroup(rdd1, rdd2, numPartitions = numPartitions), 
filterFunction))
   })
+
+#' Zips an RDD's partitions with one (or more) RDD(s).
+#' Same as zipPartitions in Spark.
+#' 
+#' @param ... RDDs to be zipped.
+#' @param func A function to transform zipped partitions.
+#' @return A new RDD by applying a function to the zipped partitions. 
+#' Assumes that all the RDDs have the *same number of partitions*, but 
+#' does *not* require them to have the same number of elements in each 
partition.
+#' @examples
+#'\dontrun{
+#' sc - sparkR.init()
+#' rdd1 - parallelize(sc, 1:2, 2L)  # 1, 2
+#' rdd2 - parallelize(sc, 1:4, 2L)  # 1:2, 3:4
+#' rdd3 - parallelize(sc, 1:6, 2L)  # 1:3, 4:6
+#' collect(zipPartitions(rdd1, rdd2, rdd3, 
+#'   func = function(x, y, z) { list(list(x, y, z))} ))
+#' # list(list(1, c(1,2), c(1,2,3)), list(2, c(3,4), c(4,5,6)))
+#'}
+#' @rdname zipRDD
+#' @aliases zipPartitions,RDD
+setMethod(zipPartitions,
+  RDD,
+  function(..., func) {
+rrdds - list(...)
+if (length(rrdds) == 1) {
+  return(rrdds[[1]])
+}
+nPart - sapply(rrdds, numPartitions)
+if (length(unique(nPart)) != 1) {
+  stop(Can only zipPartitions RDDs which have the same number of 
partitions.)
+}
+
+rrdds - lapply(rrdds, function(rdd) {
+  mapPartitionsWithIndex(rdd, function(partIndex, part) {
+print(length(part))
+list(list(partIndex, part))
+  })
+})
+union.rdd - Reduce(unionRDD, rrdds)
+zipped.rdd - values(groupByKey(union.rdd, numPartitions = 
nPart[1]))
+res - mapPartitions(zipped.rdd, function(plist) {
+  do.call(func, plist[[1]])
+})
+res
+  })

http://git-wip-us.apache.org/repos/asf/spark/blob/ca9f4ebb/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 34dbe84..e887293 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -217,6 +217,11 @@ setGeneric(unpersist, function(x, ...) { 
standardGeneric(unpersist) })
 #' @export
 setGeneric(zipRDD, function(x, other) { standardGeneric(zipRDD) })
 
+#' @rdname zipRDD
+#' @export
+setGeneric(zipPartitions, function(..., func) { 
standardGeneric(zipPartitions) }, 
+   signature = ...)
+
 #' @rdname zipWithIndex
 #' @seealso zipWithUniqueId
 #' @export

http://git-wip-us.apache.org/repos/asf/spark/blob/ca9f4ebb/R/pkg/inst/tests/test_binary_function.R
--
diff --git a/R/pkg/inst/tests/test_binary_function.R 
b/R/pkg/inst/tests

[2/2] spark git commit: [SPARK-6824] Fill the docs for DataFrame API in SparkR

2015-05-08 Thread shivaram

[SPARK-6824] Fill the docs for DataFrame API in SparkR

This patch also removes the RDD docs from being built as a part of roxygen just 
by the method to delete
 ' ' of  \#' .

Author: hqzizania qian.hu...@intel.com
Author: qhuang qian.hu...@intel.com

Closes #5969 from hqzizania/R1 and squashes the following commits:

6d27696 [qhuang] fixes in NAMESPACE
eb4b095 [qhuang] remove more docs
6394579 [qhuang] remove RDD docs in generics.R
6813860 [hqzizania] Fill the docs for DataFrame API in SparkR
857220f [hqzizania] remove the pairRDD docs from being built as a part of 
roxygen
c045d64 [hqzizania] remove the RDD docs from being built as a part of roxygen


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/008a60dd
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/008a60dd
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/008a60dd

Branch: refs/heads/master
Commit: 008a60dd371e76819d8e08ab638cac7b3a48c9fc
Parents: 65afd3c
Author: hqzizania qian.hu...@intel.com
Authored: Fri May 8 11:25:04 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Fri May 8 11:25:04 2015 -0700

--
 R/pkg/DESCRIPTION|2 +-
 R/pkg/NAMESPACE  |4 -
 R/pkg/R/DataFrame.R  |   95 +--
 R/pkg/R/RDD.R| 1546 ++---
 R/pkg/R/SQLContext.R |   64 +-
 R/pkg/R/broadcast.R  |   64 +-
 R/pkg/R/context.R|  240 +++
 R/pkg/R/generics.R   |  318 +-
 R/pkg/R/pairRDD.R|  886 +-
 9 files changed, 1610 insertions(+), 1609 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/008a60dd/R/pkg/DESCRIPTION
--
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 1c1779a..efc85bb 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -15,11 +15,11 @@ Suggests:
 Description: R frontend for Spark
 License: Apache License (== 2.0)
 Collate:
+'schema.R'
 'generics.R'
 'jobj.R'
 'RDD.R'
 'pairRDD.R'
-'schema.R'
 'column.R'
 'group.R'
 'DataFrame.R'

http://git-wip-us.apache.org/repos/asf/spark/blob/008a60dd/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 3fb92be..7611f47 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -26,7 +26,6 @@ exportMethods(cache,
   intersect,
   isLocal,
   join,
-  length,
   limit,
   orderBy,
   names,
@@ -101,9 +100,6 @@ export(cacheTable,
tables,
uncacheTable)
 
-export(sparkRSQL.init,
-   sparkRHive.init)
-
 export(structField,
structField.jobj,
structField.character,

http://git-wip-us.apache.org/repos/asf/spark/blob/008a60dd/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 47d92f1..354642e 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -45,6 +45,9 @@ setMethod(initialize, DataFrame, function(.Object, sdf, 
isCached) {
 
 #' @rdname DataFrame
 #' @export
+#'
+#' @param sdf A Java object reference to the backing Scala DataFrame
+#' @param isCached TRUE if the dataFrame is cached
 dataFrame - function(sdf, isCached = FALSE) {
   new(DataFrame, sdf, isCached)
 }
@@ -244,7 +247,7 @@ setMethod(columns,
   })
 
 #' @rdname columns
-#' @export
+#' @aliases names,DataFrame,function-method
 setMethod(names,
   signature(x = DataFrame),
   function(x) {
@@ -399,23 +402,23 @@ setMethod(repartition,
 dataFrame(sdf) 
   })
 
-#' toJSON
-#'
-#' Convert the rows of a DataFrame into JSON objects and return an RDD where
-#' each element contains a JSON string.
-#'
-#' @param x A SparkSQL DataFrame
-#' @return A StringRRDD of JSON objects
-#' @rdname tojson
-#' @export
-#' @examples
-#'\dontrun{
-#' sc - sparkR.init()
-#' sqlCtx - sparkRSQL.init(sc)
-#' path - path/to/file.json
-#' df - jsonFile(sqlCtx, path)
-#' newRDD - toJSON(df)
-#'}
+# toJSON
+#
+# Convert the rows of a DataFrame into JSON objects and return an RDD where
+# each element contains a JSON string.
+#
+#@param x A SparkSQL DataFrame
+# @return A StringRRDD of JSON objects
+# @rdname tojson
+# @export
+# @examples
+#\dontrun{
+# sc - sparkR.init()
+# sqlCtx - sparkRSQL.init(sc)
+# path - path/to/file.json
+# df - jsonFile(sqlCtx, path)
+# newRDD - toJSON(df)
+#}
 setMethod(toJSON,
   signature(x = DataFrame),
   function(x) {
@@ -578,8 +581,8 @@ setMethod(limit,
 dataFrame(res)
   })
 
-# Take the first NUM rows of a DataFrame and return a the results as a 
data.frame
-
+#' Take the first NUM rows of a DataFrame

[1/2] spark git commit: [SPARK-6824] Fill the docs for DataFrame API in SparkR

2015-05-08 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.4 75fed0ca4 - 4f01f5b56


http://git-wip-us.apache.org/repos/asf/spark/blob/4f01f5b5/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 5838955..380e8eb 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -17,353 +17,353 @@
 
  RDD Actions and Transformations 
 
-#' @rdname aggregateRDD
-#' @seealso reduce
-#' @export
+# @rdname aggregateRDD
+# @seealso reduce
+# @export
 setGeneric(aggregateRDD, function(x, zeroValue, seqOp, combOp) { 
standardGeneric(aggregateRDD) })
 
-#' @rdname cache-methods
-#' @export
+# @rdname cache-methods
+# @export
 setGeneric(cache, function(x) { standardGeneric(cache) })
 
-#' @rdname coalesce
-#' @seealso repartition
-#' @export
+# @rdname coalesce
+# @seealso repartition
+# @export
 setGeneric(coalesce, function(x, numPartitions, ...) { 
standardGeneric(coalesce) })
 
-#' @rdname checkpoint-methods
-#' @export
+# @rdname checkpoint-methods
+# @export
 setGeneric(checkpoint, function(x) { standardGeneric(checkpoint) })
 
-#' @rdname collect-methods
-#' @export
+# @rdname collect-methods
+# @export
 setGeneric(collect, function(x, ...) { standardGeneric(collect) })
 
-#' @rdname collect-methods
-#' @export
+# @rdname collect-methods
+# @export
 setGeneric(collectAsMap, function(x) { standardGeneric(collectAsMap) })
 
-#' @rdname collect-methods
-#' @export
+# @rdname collect-methods
+# @export
 setGeneric(collectPartition,
function(x, partitionId) {
  standardGeneric(collectPartition)
})
 
-#' @rdname count
-#' @export
+# @rdname count
+# @export
 setGeneric(count, function(x) { standardGeneric(count) })
 
-#' @rdname countByValue
-#' @export
+# @rdname countByValue
+# @export
 setGeneric(countByValue, function(x) { standardGeneric(countByValue) })
 
-#' @rdname distinct
-#' @export
+# @rdname distinct
+# @export
 setGeneric(distinct, function(x, numPartitions = 1) { 
standardGeneric(distinct) })
 
-#' @rdname filterRDD
-#' @export
+# @rdname filterRDD
+# @export
 setGeneric(filterRDD, function(x, f) { standardGeneric(filterRDD) })
 
-#' @rdname first
-#' @export
+# @rdname first
+# @export
 setGeneric(first, function(x) { standardGeneric(first) })
 
-#' @rdname flatMap
-#' @export
+# @rdname flatMap
+# @export
 setGeneric(flatMap, function(X, FUN) { standardGeneric(flatMap) })
 
-#' @rdname fold
-#' @seealso reduce
-#' @export
+# @rdname fold
+# @seealso reduce
+# @export
 setGeneric(fold, function(x, zeroValue, op) { standardGeneric(fold) })
 
-#' @rdname foreach
-#' @export
+# @rdname foreach
+# @export
 setGeneric(foreach, function(x, func) { standardGeneric(foreach) })
 
-#' @rdname foreach
-#' @export
+# @rdname foreach
+# @export
 setGeneric(foreachPartition, function(x, func) { 
standardGeneric(foreachPartition) })
 
 # The jrdd accessor function.
 setGeneric(getJRDD, function(rdd, ...) { standardGeneric(getJRDD) })
 
-#' @rdname glom
-#' @export
+# @rdname glom
+# @export
 setGeneric(glom, function(x) { standardGeneric(glom) })
 
-#' @rdname keyBy
-#' @export
+# @rdname keyBy
+# @export
 setGeneric(keyBy, function(x, func) { standardGeneric(keyBy) })
 
-#' @rdname lapplyPartition
-#' @export
+# @rdname lapplyPartition
+# @export
 setGeneric(lapplyPartition, function(X, FUN) { 
standardGeneric(lapplyPartition) })
 
-#' @rdname lapplyPartitionsWithIndex
-#' @export
+# @rdname lapplyPartitionsWithIndex
+# @export
 setGeneric(lapplyPartitionsWithIndex,
function(X, FUN) {
  standardGeneric(lapplyPartitionsWithIndex)
})
 
-#' @rdname lapply
-#' @export
+# @rdname lapply
+# @export
 setGeneric(map, function(X, FUN) { standardGeneric(map) })
 
-#' @rdname lapplyPartition
-#' @export
+# @rdname lapplyPartition
+# @export
 setGeneric(mapPartitions, function(X, FUN) { 
standardGeneric(mapPartitions) })
 
-#' @rdname lapplyPartitionsWithIndex
-#' @export
+# @rdname lapplyPartitionsWithIndex
+# @export
 setGeneric(mapPartitionsWithIndex,
function(X, FUN) { standardGeneric(mapPartitionsWithIndex) })
 
-#' @rdname maximum
-#' @export
+# @rdname maximum
+# @export
 setGeneric(maximum, function(x) { standardGeneric(maximum) })
 
-#' @rdname minimum
-#' @export
+# @rdname minimum
+# @export
 setGeneric(minimum, function(x) { standardGeneric(minimum) })
 
-#' @rdname sumRDD 
-#' @export
+# @rdname sumRDD 
+# @export
 setGeneric(sumRDD, function(x) { standardGeneric(sumRDD) })
 
-#' @rdname name
-#' @export
+# @rdname name
+# @export
 setGeneric(name, function(x) { standardGeneric(name) })
 
-#' @rdname numPartitions
-#' @export
+# @rdname numPartitions
+# @export
 setGeneric(numPartitions, function(x) { standardGeneric(numPartitions) })
 
-#' @rdname persist
-#' @export
+# @rdname persist
+# @export
 setGeneric(persist, function(x, newLevel) { standardGeneric(persist) })
 
-#' @rdname pipeRDD
-#' @export
+# @rdname

[1/2] spark git commit: [SPARK-6824] Fill the docs for DataFrame API in SparkR

2015-05-08 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 65afd3ce8 - 008a60dd3


http://git-wip-us.apache.org/repos/asf/spark/blob/008a60dd/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 5838955..380e8eb 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -17,353 +17,353 @@
 
  RDD Actions and Transformations 
 
-#' @rdname aggregateRDD
-#' @seealso reduce
-#' @export
+# @rdname aggregateRDD
+# @seealso reduce
+# @export
 setGeneric(aggregateRDD, function(x, zeroValue, seqOp, combOp) { 
standardGeneric(aggregateRDD) })
 
-#' @rdname cache-methods
-#' @export
+# @rdname cache-methods
+# @export
 setGeneric(cache, function(x) { standardGeneric(cache) })
 
-#' @rdname coalesce
-#' @seealso repartition
-#' @export
+# @rdname coalesce
+# @seealso repartition
+# @export
 setGeneric(coalesce, function(x, numPartitions, ...) { 
standardGeneric(coalesce) })
 
-#' @rdname checkpoint-methods
-#' @export
+# @rdname checkpoint-methods
+# @export
 setGeneric(checkpoint, function(x) { standardGeneric(checkpoint) })
 
-#' @rdname collect-methods
-#' @export
+# @rdname collect-methods
+# @export
 setGeneric(collect, function(x, ...) { standardGeneric(collect) })
 
-#' @rdname collect-methods
-#' @export
+# @rdname collect-methods
+# @export
 setGeneric(collectAsMap, function(x) { standardGeneric(collectAsMap) })
 
-#' @rdname collect-methods
-#' @export
+# @rdname collect-methods
+# @export
 setGeneric(collectPartition,
function(x, partitionId) {
  standardGeneric(collectPartition)
})
 
-#' @rdname count
-#' @export
+# @rdname count
+# @export
 setGeneric(count, function(x) { standardGeneric(count) })
 
-#' @rdname countByValue
-#' @export
+# @rdname countByValue
+# @export
 setGeneric(countByValue, function(x) { standardGeneric(countByValue) })
 
-#' @rdname distinct
-#' @export
+# @rdname distinct
+# @export
 setGeneric(distinct, function(x, numPartitions = 1) { 
standardGeneric(distinct) })
 
-#' @rdname filterRDD
-#' @export
+# @rdname filterRDD
+# @export
 setGeneric(filterRDD, function(x, f) { standardGeneric(filterRDD) })
 
-#' @rdname first
-#' @export
+# @rdname first
+# @export
 setGeneric(first, function(x) { standardGeneric(first) })
 
-#' @rdname flatMap
-#' @export
+# @rdname flatMap
+# @export
 setGeneric(flatMap, function(X, FUN) { standardGeneric(flatMap) })
 
-#' @rdname fold
-#' @seealso reduce
-#' @export
+# @rdname fold
+# @seealso reduce
+# @export
 setGeneric(fold, function(x, zeroValue, op) { standardGeneric(fold) })
 
-#' @rdname foreach
-#' @export
+# @rdname foreach
+# @export
 setGeneric(foreach, function(x, func) { standardGeneric(foreach) })
 
-#' @rdname foreach
-#' @export
+# @rdname foreach
+# @export
 setGeneric(foreachPartition, function(x, func) { 
standardGeneric(foreachPartition) })
 
 # The jrdd accessor function.
 setGeneric(getJRDD, function(rdd, ...) { standardGeneric(getJRDD) })
 
-#' @rdname glom
-#' @export
+# @rdname glom
+# @export
 setGeneric(glom, function(x) { standardGeneric(glom) })
 
-#' @rdname keyBy
-#' @export
+# @rdname keyBy
+# @export
 setGeneric(keyBy, function(x, func) { standardGeneric(keyBy) })
 
-#' @rdname lapplyPartition
-#' @export
+# @rdname lapplyPartition
+# @export
 setGeneric(lapplyPartition, function(X, FUN) { 
standardGeneric(lapplyPartition) })
 
-#' @rdname lapplyPartitionsWithIndex
-#' @export
+# @rdname lapplyPartitionsWithIndex
+# @export
 setGeneric(lapplyPartitionsWithIndex,
function(X, FUN) {
  standardGeneric(lapplyPartitionsWithIndex)
})
 
-#' @rdname lapply
-#' @export
+# @rdname lapply
+# @export
 setGeneric(map, function(X, FUN) { standardGeneric(map) })
 
-#' @rdname lapplyPartition
-#' @export
+# @rdname lapplyPartition
+# @export
 setGeneric(mapPartitions, function(X, FUN) { 
standardGeneric(mapPartitions) })
 
-#' @rdname lapplyPartitionsWithIndex
-#' @export
+# @rdname lapplyPartitionsWithIndex
+# @export
 setGeneric(mapPartitionsWithIndex,
function(X, FUN) { standardGeneric(mapPartitionsWithIndex) })
 
-#' @rdname maximum
-#' @export
+# @rdname maximum
+# @export
 setGeneric(maximum, function(x) { standardGeneric(maximum) })
 
-#' @rdname minimum
-#' @export
+# @rdname minimum
+# @export
 setGeneric(minimum, function(x) { standardGeneric(minimum) })
 
-#' @rdname sumRDD 
-#' @export
+# @rdname sumRDD 
+# @export
 setGeneric(sumRDD, function(x) { standardGeneric(sumRDD) })
 
-#' @rdname name
-#' @export
+# @rdname name
+# @export
 setGeneric(name, function(x) { standardGeneric(name) })
 
-#' @rdname numPartitions
-#' @export
+# @rdname numPartitions
+# @export
 setGeneric(numPartitions, function(x) { standardGeneric(numPartitions) })
 
-#' @rdname persist
-#' @export
+# @rdname persist
+# @export
 setGeneric(persist, function(x, newLevel) { standardGeneric(persist) })
 
-#' @rdname pipeRDD
-#' @export
+# @rdname

[2/2] spark git commit: [SPARK-6824] Fill the docs for DataFrame API in SparkR

2015-05-08 Thread shivaram

[SPARK-6824] Fill the docs for DataFrame API in SparkR

This patch also removes the RDD docs from being built as a part of roxygen just 
by the method to delete
 ' ' of  \#' .

Author: hqzizania qian.hu...@intel.com
Author: qhuang qian.hu...@intel.com

Closes #5969 from hqzizania/R1 and squashes the following commits:

6d27696 [qhuang] fixes in NAMESPACE
eb4b095 [qhuang] remove more docs
6394579 [qhuang] remove RDD docs in generics.R
6813860 [hqzizania] Fill the docs for DataFrame API in SparkR
857220f [hqzizania] remove the pairRDD docs from being built as a part of 
roxygen
c045d64 [hqzizania] remove the RDD docs from being built as a part of roxygen

(cherry picked from commit 008a60dd371e76819d8e08ab638cac7b3a48c9fc)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4f01f5b5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4f01f5b5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4f01f5b5

Branch: refs/heads/branch-1.4
Commit: 4f01f5b563819e2ce7d3ac7ea86162b4e76935a3
Parents: 75fed0c
Author: hqzizania qian.hu...@intel.com
Authored: Fri May 8 11:25:04 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Fri May 8 11:25:20 2015 -0700

--
 R/pkg/DESCRIPTION|2 +-
 R/pkg/NAMESPACE  |4 -
 R/pkg/R/DataFrame.R  |   95 +--
 R/pkg/R/RDD.R| 1546 ++---
 R/pkg/R/SQLContext.R |   64 +-
 R/pkg/R/broadcast.R  |   64 +-
 R/pkg/R/context.R|  240 +++
 R/pkg/R/generics.R   |  318 +-
 R/pkg/R/pairRDD.R|  886 +-
 9 files changed, 1610 insertions(+), 1609 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/4f01f5b5/R/pkg/DESCRIPTION
--
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 1c1779a..efc85bb 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -15,11 +15,11 @@ Suggests:
 Description: R frontend for Spark
 License: Apache License (== 2.0)
 Collate:
+'schema.R'
 'generics.R'
 'jobj.R'
 'RDD.R'
 'pairRDD.R'
-'schema.R'
 'column.R'
 'group.R'
 'DataFrame.R'

http://git-wip-us.apache.org/repos/asf/spark/blob/4f01f5b5/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 3fb92be..7611f47 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -26,7 +26,6 @@ exportMethods(cache,
   intersect,
   isLocal,
   join,
-  length,
   limit,
   orderBy,
   names,
@@ -101,9 +100,6 @@ export(cacheTable,
tables,
uncacheTable)
 
-export(sparkRSQL.init,
-   sparkRHive.init)
-
 export(structField,
structField.jobj,
structField.character,

http://git-wip-us.apache.org/repos/asf/spark/blob/4f01f5b5/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 47d92f1..354642e 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -45,6 +45,9 @@ setMethod(initialize, DataFrame, function(.Object, sdf, 
isCached) {
 
 #' @rdname DataFrame
 #' @export
+#'
+#' @param sdf A Java object reference to the backing Scala DataFrame
+#' @param isCached TRUE if the dataFrame is cached
 dataFrame - function(sdf, isCached = FALSE) {
   new(DataFrame, sdf, isCached)
 }
@@ -244,7 +247,7 @@ setMethod(columns,
   })
 
 #' @rdname columns
-#' @export
+#' @aliases names,DataFrame,function-method
 setMethod(names,
   signature(x = DataFrame),
   function(x) {
@@ -399,23 +402,23 @@ setMethod(repartition,
 dataFrame(sdf) 
   })
 
-#' toJSON
-#'
-#' Convert the rows of a DataFrame into JSON objects and return an RDD where
-#' each element contains a JSON string.
-#'
-#' @param x A SparkSQL DataFrame
-#' @return A StringRRDD of JSON objects
-#' @rdname tojson
-#' @export
-#' @examples
-#'\dontrun{
-#' sc - sparkR.init()
-#' sqlCtx - sparkRSQL.init(sc)
-#' path - path/to/file.json
-#' df - jsonFile(sqlCtx, path)
-#' newRDD - toJSON(df)
-#'}
+# toJSON
+#
+# Convert the rows of a DataFrame into JSON objects and return an RDD where
+# each element contains a JSON string.
+#
+#@param x A SparkSQL DataFrame
+# @return A StringRRDD of JSON objects
+# @rdname tojson
+# @export
+# @examples
+#\dontrun{
+# sc - sparkR.init()
+# sqlCtx - sparkRSQL.init(sc)
+# path - path/to/file.json
+# df - jsonFile(sqlCtx, path)
+# newRDD - toJSON(df)
+#}
 setMethod(toJSON,
   signature(x = DataFrame),
   function(x) {
@@ -578,8 +581,8 @@ setMethod(limit,
 dataFrame(res

spark git commit: updated ec2 instance types

2015-05-08 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 35c9599b9 - 1c78f6866


updated ec2 instance types

I needed to run some d2 instances, so I updated the spark_ec2.py accordingly

Author: Brendan Collins bcoll...@blueraster.com

Closes #6014 from brendancol/ec2-instance-types-update and squashes the 
following commits:

d7b4191 [Brendan Collins] Merge branch 'ec2-instance-types-update' of 
github.com:brendancol/spark into ec2-instance-types-update
6366c45 [Brendan Collins] added back cc1.4xlarge
fc2931f [Brendan Collins] updated ec2 instance types
80c2aa6 [Brendan Collins] vertically aligned whitespace
85c6236 [Brendan Collins] vertically aligned whitespace
1657c26 [Brendan Collins] updated ec2 instance types


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1c78f686
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1c78f686
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1c78f686

Branch: refs/heads/master
Commit: 1c78f6866ebbcfb41d9875bfa3c0b9fa23b188bf
Parents: 35c9599
Author: Brendan Collins bcoll...@blueraster.com
Authored: Fri May 8 15:59:34 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Fri May 8 15:59:34 2015 -0700

--
 ec2/spark_ec2.py | 70 ++-
 1 file changed, 47 insertions(+), 23 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1c78f686/ec2/spark_ec2.py
--
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index 87c0818..ab4a96f 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -347,46 +347,57 @@ def get_validate_spark_version(version, repo):
 
 
 # Source: http://aws.amazon.com/amazon-linux-ami/instance-type-matrix/
-# Last Updated: 2014-06-20
+# Last Updated: 2015-05-08
 # For easy maintainability, please keep this manually-inputted dictionary 
sorted by key.
 EC2_INSTANCE_TYPES = {
 c1.medium:   pvm,
 c1.xlarge:   pvm,
+c3.large:pvm,
+c3.xlarge:   pvm,
 c3.2xlarge:  pvm,
 c3.4xlarge:  pvm,
 c3.8xlarge:  pvm,
-c3.large:pvm,
-c3.xlarge:   pvm,
+c4.large:hvm,
+c4.xlarge:   hvm,
+c4.2xlarge:  hvm,
+c4.4xlarge:  hvm,
+c4.8xlarge:  hvm,
 cc1.4xlarge: hvm,
 cc2.8xlarge: hvm,
 cg1.4xlarge: hvm,
 cr1.8xlarge: hvm,
+d2.xlarge:   hvm,
+d2.2xlarge:  hvm,
+d2.4xlarge:  hvm,
+d2.8xlarge:  hvm,
+g2.2xlarge:  hvm,
+g2.8xlarge:  hvm,
 hi1.4xlarge: pvm,
 hs1.8xlarge: pvm,
+i2.xlarge:   hvm,
 i2.2xlarge:  hvm,
 i2.4xlarge:  hvm,
 i2.8xlarge:  hvm,
-i2.xlarge:   hvm,
-m1.large:pvm,
-m1.medium:   pvm,
 m1.small:pvm,
+m1.medium:   pvm,
+m1.large:pvm,
 m1.xlarge:   pvm,
+m2.xlarge:   pvm,
 m2.2xlarge:  pvm,
 m2.4xlarge:  pvm,
-m2.xlarge:   pvm,
-m3.2xlarge:  hvm,
-m3.large:hvm,
 m3.medium:   hvm,
+m3.large:hvm,
 m3.xlarge:   hvm,
+m3.2xlarge:  hvm,
+r3.large:hvm,
+r3.xlarge:   hvm,
 r3.2xlarge:  hvm,
 r3.4xlarge:  hvm,
 r3.8xlarge:  hvm,
-r3.large:hvm,
-r3.xlarge:   hvm,
 t1.micro:pvm,
-t2.medium:   hvm,
 t2.micro:hvm,
 t2.small:hvm,
+t2.medium:   hvm,
 }
 
 
@@ -878,44 +889,57 @@ def wait_for_cluster_state(conn, opts, cluster_instances, 
cluster_state):
 # Get number of local disks available for a given EC2 instance type.
 def get_num_disks(instance_type):
 # Source: 
http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html
-# Last Updated: 2014-06-20
+# Last Updated: 2015-05-08
 # For easy maintainability, please keep this manually-inputted dictionary 
sorted by key.
 disks_by_instance = {
 c1.medium:   1,
 c1.xlarge:   4,
+c3.large:2,
+c3.xlarge:   2,
 c3.2xlarge:  2,
 c3.4xlarge:  2,
 c3.8xlarge:  2,
-c3.large:2,
-c3.xlarge:   2,
+c4.large:0,
+c4.xlarge:   0,
+c4.2xlarge:  0,
+c4.4xlarge:  0,
+c4.8xlarge:  0,
 cc1.4xlarge: 2,
 cc2.8xlarge: 4,
 cg1.4xlarge: 2,
 cr1.8xlarge: 2,
+d2.xlarge:   3,
+d2.2xlarge:  6,
+d2.4xlarge:  12,
+d2.8xlarge:  24,
 g2.2xlarge:  1,
+g2.8xlarge:  2,
 hi1.4xlarge: 2,
 hs1.8xlarge: 24,
+i2.xlarge:   1,
 i2.2xlarge:  2,
 i2.4xlarge:  4,
 i2.8xlarge:  8,
-i2.xlarge:   1,
-m1.large:2,
-m1.medium:   1,
 m1.small:1,
+m1.medium:   1,
+m1.large:2,
 m1.xlarge:   4,
+m2.xlarge:   1,
 m2.2xlarge:  1,
 m2.4xlarge:  2,
-m2.xlarge:   1

spark git commit: [SPARK-7226] [SPARKR] Support math functions in R DataFrame

2015-05-15 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 9b6cf285d - 50da9e891


[SPARK-7226] [SPARKR] Support math functions in R DataFrame

Author: qhuang qian.hu...@intel.com

Closes #6170 from hqzizania/master and squashes the following commits:

f20c39f [qhuang] add tests units and fixes
2a7d121 [qhuang] use a function name more familiar to R users
07aa72e [qhuang] Support math functions in R DataFrame


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/50da9e89
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/50da9e89
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/50da9e89

Branch: refs/heads/master
Commit: 50da9e89161faa0ecdc1feb3ffee6c822a742034
Parents: 9b6cf28
Author: qhuang qian.hu...@intel.com
Authored: Fri May 15 14:06:16 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Fri May 15 14:06:16 2015 -0700

--
 R/pkg/NAMESPACE  | 23 ++
 R/pkg/R/column.R | 36 ---
 R/pkg/R/generics.R   | 20 +++
 R/pkg/inst/tests/test_sparkSQL.R | 24 +++
 4 files changed, 100 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/50da9e89/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index ba29614..64ffdcf 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -59,33 +59,56 @@ exportMethods(arrange,
 exportClasses(Column)
 
 exportMethods(abs,
+  acos,
   alias,
   approxCountDistinct,
   asc,
+  asin,
+  atan,
+  atan2,
   avg,
   cast,
+  cbrt,
+  ceiling,
   contains,
+  cos,
+  cosh,
   countDistinct,
   desc,
   endsWith,
+  exp,
+  expm1,
+  floor,
   getField,
   getItem,
+  hypot,
   isNotNull,
   isNull,
   last,
   like,
+  log,
+  log10,
+  log1p,
   lower,
   max,
   mean,
   min,
   n,
   n_distinct,
+  rint,
   rlike,
+  sign,
+  sin,
+  sinh,
   sqrt,
   startsWith,
   substr,
   sum,
   sumDistinct,
+  tan,
+  tanh,
+  toDegrees,
+  toRadians,
   upper)
 
 exportClasses(GroupedData)

http://git-wip-us.apache.org/repos/asf/spark/blob/50da9e89/R/pkg/R/column.R
--
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 9a68445..80e92d3 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -55,12 +55,17 @@ operators - list(
   + = plus, - = minus, * = multiply, / = divide, %% = mod,
   == = equalTo,  = gt,  = lt, != = notEqual, = = leq, 
= = geq,
   # we can not override `` and `||`, so use `` and `|` instead
-   = and, | = or #, ! = unary_$bang
+   = and, | = or, #, ! = unary_$bang
+  ^ = pow
 )
 column_functions1 - c(asc, desc, isNull, isNotNull)
 column_functions2 - c(like, rlike, startsWith, endsWith, getField, 
getItem, contains)
 functions - c(min, max, sum, avg, mean, count, abs, sqrt,
-   first, last, lower, upper, sumDistinct)
+   first, last, lower, upper, sumDistinct,
+   acos, asin, atan, cbrt, ceiling, cos, cosh, exp,
+   expm1, floor, log, log10, log1p, rint, sign,
+   sin, sinh, tan, tanh, toDegrees, toRadians)
+binary_mathfunctions- c(atan2, hypot)
 
 createOperator - function(op) {
   setMethod(op,
@@ -76,7 +81,11 @@ createOperator - function(op) {
 if (class(e2) == Column) {
   e2 - e2@jc
 }
-callJMethod(e1@jc, operators[[op]], e2)
+if (op == ^) {
+  jc - callJStatic(org.apache.spark.sql.functions, 
operators[[op]], e1@jc, e2)
+} else {
+  callJMethod(e1@jc, operators[[op]], e2)
+}
   }
   column(jc)
 })
@@ -106,11 +115,29 @@ createStaticFunction - function(name) {
   setMethod(name,
 signature(x = Column),
 function(x) {
+  if (name == ceiling) {
+  name - ceil
+  }
+  if (name == sign) {
+  name - signum
+  }
   jc - callJStatic(org.apache.spark.sql.functions, name, x@jc

spark git commit: [SPARK-7226] [SPARKR] Support math functions in R DataFrame

2015-05-15 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.4 a5f7b3b9c - 9ef6d743a


[SPARK-7226] [SPARKR] Support math functions in R DataFrame

Author: qhuang qian.hu...@intel.com

Closes #6170 from hqzizania/master and squashes the following commits:

f20c39f [qhuang] add tests units and fixes
2a7d121 [qhuang] use a function name more familiar to R users
07aa72e [qhuang] Support math functions in R DataFrame

(cherry picked from commit 50da9e89161faa0ecdc1feb3ffee6c822a742034)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9ef6d743
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9ef6d743
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9ef6d743

Branch: refs/heads/branch-1.4
Commit: 9ef6d743a65cb3f962e4f2e0716f55dbe7efb084
Parents: a5f7b3b
Author: qhuang qian.hu...@intel.com
Authored: Fri May 15 14:06:16 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Fri May 15 14:06:39 2015 -0700

--
 R/pkg/NAMESPACE  | 23 ++
 R/pkg/R/column.R | 36 ---
 R/pkg/R/generics.R   | 20 +++
 R/pkg/inst/tests/test_sparkSQL.R | 24 +++
 4 files changed, 100 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9ef6d743/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index ba29614..64ffdcf 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -59,33 +59,56 @@ exportMethods(arrange,
 exportClasses(Column)
 
 exportMethods(abs,
+  acos,
   alias,
   approxCountDistinct,
   asc,
+  asin,
+  atan,
+  atan2,
   avg,
   cast,
+  cbrt,
+  ceiling,
   contains,
+  cos,
+  cosh,
   countDistinct,
   desc,
   endsWith,
+  exp,
+  expm1,
+  floor,
   getField,
   getItem,
+  hypot,
   isNotNull,
   isNull,
   last,
   like,
+  log,
+  log10,
+  log1p,
   lower,
   max,
   mean,
   min,
   n,
   n_distinct,
+  rint,
   rlike,
+  sign,
+  sin,
+  sinh,
   sqrt,
   startsWith,
   substr,
   sum,
   sumDistinct,
+  tan,
+  tanh,
+  toDegrees,
+  toRadians,
   upper)
 
 exportClasses(GroupedData)

http://git-wip-us.apache.org/repos/asf/spark/blob/9ef6d743/R/pkg/R/column.R
--
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 9a68445..80e92d3 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -55,12 +55,17 @@ operators - list(
   + = plus, - = minus, * = multiply, / = divide, %% = mod,
   == = equalTo,  = gt,  = lt, != = notEqual, = = leq, 
= = geq,
   # we can not override `` and `||`, so use `` and `|` instead
-   = and, | = or #, ! = unary_$bang
+   = and, | = or, #, ! = unary_$bang
+  ^ = pow
 )
 column_functions1 - c(asc, desc, isNull, isNotNull)
 column_functions2 - c(like, rlike, startsWith, endsWith, getField, 
getItem, contains)
 functions - c(min, max, sum, avg, mean, count, abs, sqrt,
-   first, last, lower, upper, sumDistinct)
+   first, last, lower, upper, sumDistinct,
+   acos, asin, atan, cbrt, ceiling, cos, cosh, exp,
+   expm1, floor, log, log10, log1p, rint, sign,
+   sin, sinh, tan, tanh, toDegrees, toRadians)
+binary_mathfunctions- c(atan2, hypot)
 
 createOperator - function(op) {
   setMethod(op,
@@ -76,7 +81,11 @@ createOperator - function(op) {
 if (class(e2) == Column) {
   e2 - e2@jc
 }
-callJMethod(e1@jc, operators[[op]], e2)
+if (op == ^) {
+  jc - callJStatic(org.apache.spark.sql.functions, 
operators[[op]], e1@jc, e2)
+} else {
+  callJMethod(e1@jc, operators[[op]], e2)
+}
   }
   column(jc)
 })
@@ -106,11 +115,29 @@ createStaticFunction - function(name) {
   setMethod(name,
 signature(x = Column),
 function(x) {
+  if (name == ceiling) {
+  name - ceil
+  }
+  if (name

spark git commit: [SPARK-6855] [SPARKR] Set R includes to get the right collate order.

2015-04-16 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master ef3fb801a - 55f553a97


[SPARK-6855] [SPARKR] Set R includes to get the right collate order.

This prevents tools like devtools::document creating invalid collate orders

Author: Shivaram Venkataraman shiva...@cs.berkeley.edu

Closes #5462 from shivaram/collate-order and squashes the following commits:

f3db562 [Shivaram Venkataraman] Set R includes to get the right collate order. 
This prevents tools like devtools::document creating invalid collate orders


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/55f553a9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/55f553a9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/55f553a9

Branch: refs/heads/master
Commit: 55f553a979db925aa0c3559f7e80b99d2bf3feb4
Parents: ef3fb80
Author: Shivaram Venkataraman shiva...@cs.berkeley.edu
Authored: Thu Apr 16 13:06:34 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Thu Apr 16 13:06:34 2015 -0700

--
 R/pkg/DESCRIPTION   | 6 +++---
 R/pkg/R/DataFrame.R | 2 +-
 R/pkg/R/column.R| 2 +-
 R/pkg/R/group.R | 3 +++
 R/pkg/R/jobj.R  | 3 +++
 R/pkg/R/pairRDD.R   | 2 ++
 6 files changed, 13 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/55f553a9/R/pkg/DESCRIPTION
--
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 1842b97..052f68c 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -17,19 +17,19 @@ License: Apache License (== 2.0)
 Collate:
 'generics.R'
 'jobj.R'
-'SQLTypes.R'
 'RDD.R'
 'pairRDD.R'
+'SQLTypes.R'
 'column.R'
 'group.R'
 'DataFrame.R'
 'SQLContext.R'
+'backend.R'
 'broadcast.R'
+'client.R'
 'context.R'
 'deserialize.R'
 'serialize.R'
 'sparkR.R'
-'backend.R'
-'client.R'
 'utils.R'
 'zzz.R'

http://git-wip-us.apache.org/repos/asf/spark/blob/55f553a9/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index feafd56..044fdb4 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -17,7 +17,7 @@
 
 # DataFrame.R - DataFrame class and methods implemented in S4 OO classes
 
-#' @include jobj.R SQLTypes.R RDD.R pairRDD.R column.R group.R
+#' @include generics.R jobj.R SQLTypes.R RDD.R pairRDD.R column.R group.R
 NULL
 
 setOldClass(jobj)

http://git-wip-us.apache.org/repos/asf/spark/blob/55f553a9/R/pkg/R/column.R
--
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index e196305..b282001 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -17,7 +17,7 @@
 
 # Column Class
 
-#' @include generics.R jobj.R
+#' @include generics.R jobj.R SQLTypes.R
 NULL
 
 setOldClass(jobj)

http://git-wip-us.apache.org/repos/asf/spark/blob/55f553a9/R/pkg/R/group.R
--
diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R
index 09fc0a7..855fbdf 100644
--- a/R/pkg/R/group.R
+++ b/R/pkg/R/group.R
@@ -17,6 +17,9 @@
 
 # group.R - GroupedData class and methods implemented in S4 OO classes
 
+#' @include generics.R jobj.R SQLTypes.R column.R
+NULL
+
 setOldClass(jobj)
 
 #' @title S4 class that represents a GroupedData

http://git-wip-us.apache.org/repos/asf/spark/blob/55f553a9/R/pkg/R/jobj.R
--
diff --git a/R/pkg/R/jobj.R b/R/pkg/R/jobj.R
index 4180f14..a8a2523 100644
--- a/R/pkg/R/jobj.R
+++ b/R/pkg/R/jobj.R
@@ -18,6 +18,9 @@
 # References to objects that exist on the JVM backend
 # are maintained using the jobj. 
 
+#' @include generics.R
+NULL
+
 # Maintain a reference count of Java object references
 # This allows us to GC the java object when it is safe
 .validJobjs - new.env(parent = emptyenv())

http://git-wip-us.apache.org/repos/asf/spark/blob/55f553a9/R/pkg/R/pairRDD.R
--
diff --git a/R/pkg/R/pairRDD.R b/R/pkg/R/pairRDD.R
index 739d399..5d64822 100644
--- a/R/pkg/R/pairRDD.R
+++ b/R/pkg/R/pairRDD.R
@@ -16,6 +16,8 @@
 #
 
 # Operations supported on RDDs contains pairs (i.e key, value)
+#' @include generics.R jobj.R RDD.R
+NULL
 
  Actions and Transformations 
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-6807] [SparkR] Merge recent SparkR-pkg changes

2015-04-17 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master a83571acc - 59e206deb


[SPARK-6807] [SparkR] Merge recent SparkR-pkg changes

This PR pulls in recent changes in SparkR-pkg, including

cartesian, intersection, sampleByKey, subtract, subtractByKey, except, and some 
API for StructType and StructField.

Author: cafreeman cfree...@alteryx.com
Author: Davies Liu dav...@databricks.com
Author: Zongheng Yang zonghen...@gmail.com
Author: Shivaram Venkataraman shivaram.venkatara...@gmail.com
Author: Shivaram Venkataraman shiva...@cs.berkeley.edu
Author: Sun Rui rui@intel.com

Closes #5436 from davies/R3 and squashes the following commits:

c2b09be [Davies Liu] SQLTypes - schema
a5a02f2 [Davies Liu] Merge branch 'master' of github.com:apache/spark into R3
168b7fe [Davies Liu] sort generics
b1fe460 [Davies Liu] fix conflict in README.md
e74c04e [Davies Liu] fix schema.R
4f5ac09 [Davies Liu] Merge branch 'master' of github.com:apache/spark into R5
41f8184 [Davies Liu] rm man
ae78312 [Davies Liu] Merge pull request #237 from sun-rui/SPARKR-154_3
1bdcb63 [Zongheng Yang] Updates to README.md.
5a553e7 [cafreeman] Use object attribute instead of argument
71372d9 [cafreeman] Update docs and examples
8526d2e71 [cafreeman] Remove `tojson` functions
6ef5f2d [cafreeman] Fix spacing
7741d66 [cafreeman] Rename the SQL DataType function
141efd8 [Shivaram Venkataraman] Merge pull request #245 from hqzizania/upstream
9387402 [Davies Liu] fix style
40199eb [Shivaram Venkataraman] Move except into sorted position
07d0dbc [Sun Rui] [SPARKR-244] Fix test failure after integration of subtract() 
and subtractByKey() for RDD.
7e8caa3 [Shivaram Venkataraman] Merge pull request #246 from 
hlin09/fixCombineByKey
ed66c81 [cafreeman] Update `subtract` to work with `generics.R`
f3ba785 [cafreeman] Fixed duplicate export
275deb4 [cafreeman] Update `NAMESPACE` and tests
1a3b63d [cafreeman] new version of `CreateDF`
836c4bf [cafreeman] Update `createDataFrame` and `toDF`
be5d5c1 [cafreeman] refactor schema functions
40338a4 [Zongheng Yang] Merge pull request #244 from sun-rui/SPARKR-154_5
20b97a6 [Zongheng Yang] Merge pull request #234 from hqzizania/assist
ba54e34 [Shivaram Venkataraman] Merge pull request #238 from 
sun-rui/SPARKR-154_4
c9497a3 [Shivaram Venkataraman] Merge pull request #208 from lythesia/master
b317aa7 [Zongheng Yang] Merge pull request #243 from hqzizania/master
136a07e [Zongheng Yang] Merge pull request #242 from hqzizania/stats
cd66603 [cafreeman] new line at EOF
8b76e81 [Shivaram Venkataraman] Merge pull request #233 from 
redbaron/fail-early-on-missing-dep
7dd81b7 [cafreeman] Documentation
0e2a94f [cafreeman] Define functions for schema and fields


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/59e206de
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/59e206de
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/59e206de

Branch: refs/heads/master
Commit: 59e206deb7346148412bbf5ba4ab626718fadf18
Parents: a83571a
Author: cafreeman cfree...@alteryx.com
Authored: Fri Apr 17 13:42:19 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Fri Apr 17 13:42:19 2015 -0700

--
 R/pkg/DESCRIPTION   |   2 +-
 R/pkg/NAMESPACE |  20 +-
 R/pkg/R/DataFrame.R |  18 +-
 R/pkg/R/RDD.R   | 205 ---
 R/pkg/R/SQLContext.R|  44 +---
 R/pkg/R/SQLTypes.R  |  64 --
 R/pkg/R/column.R|   2 +-
 R/pkg/R/generics.R  |  46 -
 R/pkg/R/group.R |   2 +-
 R/pkg/R/pairRDD.R   | 192 +
 R/pkg/R/schema.R| 162 +++
 R/pkg/R/serialize.R |   9 +-
 R/pkg/R/utils.R |  80 
 R/pkg/inst/tests/test_rdd.R | 193 ++---
 R/pkg/inst/tests/test_shuffle.R |  12 ++
 R/pkg/inst/tests/test_sparkSQL.R|  35 ++--
 R/pkg/inst/worker/worker.R  |  59 +-
 .../scala/org/apache/spark/api/r/RRDD.scala | 131 ++--
 .../scala/org/apache/spark/api/r/SerDe.scala|  14 +-
 .../org/apache/spark/sql/api/r/SQLUtils.scala   |  32 ++-
 20 files changed, 971 insertions(+), 351 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/59e206de/R/pkg/DESCRIPTION
--
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 052f68c..1c1779a 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -19,7 +19,7 @@ Collate:
 'jobj.R

spark git commit: [SPARK-6850] [SparkR] use one partition when we need to compare the whole result

2015-04-10 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 4740d6a15 - 68ecdb7f9


[SPARK-6850] [SparkR] use one partition when we need to compare the whole result

Author: Davies Liu dav...@databricks.com

Closes #5460 from davies/r_test and squashes the following commits:

0a593ce [Davies Liu] use one partition when we need to compare the whole result


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/68ecdb7f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/68ecdb7f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/68ecdb7f

Branch: refs/heads/master
Commit: 68ecdb7f99ae30f7c04c33a47ab7f59a3836f2a4
Parents: 4740d6a
Author: Davies Liu dav...@databricks.com
Authored: Fri Apr 10 15:35:45 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Fri Apr 10 15:35:45 2015 -0700

--
 R/pkg/inst/tests/test_binaryFile.R | 4 ++--
 R/pkg/inst/tests/test_textFile.R   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/68ecdb7f/R/pkg/inst/tests/test_binaryFile.R
--
diff --git a/R/pkg/inst/tests/test_binaryFile.R 
b/R/pkg/inst/tests/test_binaryFile.R
index 4bb5f58..ca4218f 100644
--- a/R/pkg/inst/tests/test_binaryFile.R
+++ b/R/pkg/inst/tests/test_binaryFile.R
@@ -27,7 +27,7 @@ test_that(saveAsObjectFile()/objectFile() following 
textFile() works, {
   fileName2 - tempfile(pattern=spark-test, fileext=.tmp)
   writeLines(mockFile, fileName1)
 
-  rdd - textFile(sc, fileName1)
+  rdd - textFile(sc, fileName1, 1)
   saveAsObjectFile(rdd, fileName2)
   rdd - objectFile(sc, fileName2)
   expect_equal(collect(rdd), as.list(mockFile))
@@ -40,7 +40,7 @@ test_that(saveAsObjectFile()/objectFile() works on a 
parallelized list, {
   fileName - tempfile(pattern=spark-test, fileext=.tmp)
 
   l - list(1, 2, 3)
-  rdd - parallelize(sc, l)
+  rdd - parallelize(sc, l, 1)
   saveAsObjectFile(rdd, fileName)
   rdd - objectFile(sc, fileName)
   expect_equal(collect(rdd), l)

http://git-wip-us.apache.org/repos/asf/spark/blob/68ecdb7f/R/pkg/inst/tests/test_textFile.R
--
diff --git a/R/pkg/inst/tests/test_textFile.R b/R/pkg/inst/tests/test_textFile.R
index 7bb3e80..6b87b4b 100644
--- a/R/pkg/inst/tests/test_textFile.R
+++ b/R/pkg/inst/tests/test_textFile.R
@@ -81,7 +81,7 @@ test_that(textFile() followed by a saveAsTextFile() returns 
the same content,
   fileName2 - tempfile(pattern=spark-test, fileext=.tmp)
   writeLines(mockFile, fileName1)
 
-  rdd - textFile(sc, fileName1)
+  rdd - textFile(sc, fileName1, 1L)
   saveAsTextFile(rdd, fileName2)
   rdd - textFile(sc, fileName2)
   expect_equal(collect(rdd), as.list(mockFile))
@@ -93,7 +93,7 @@ test_that(textFile() followed by a saveAsTextFile() returns 
the same content,
 test_that(saveAsTextFile() on a parallelized list works as expected, {
   fileName - tempfile(pattern=spark-test, fileext=.tmp)
   l - list(1, 2, 3)
-  rdd - parallelize(sc, l)
+  rdd - parallelize(sc, l, 1L)
   saveAsTextFile(rdd, fileName)
   rdd - textFile(sc, fileName)
   expect_equal(collect(rdd), lapply(l, function(x) {toString(x)}))


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[5/7] spark git commit: [SPARK-5654] Integrate SparkR

2015-04-08 Thread shivaram

(filterRDD(rdd, function (x) { x  3 }))) # c(1, 2)
+#'}
+#' @rdname filterRDD
+#' @aliases filterRDD,RDD,function-method
+setMethod(filterRDD,
+  signature(x = RDD, f = function),
+  function(x, f) {
+filter.func - function(part) {
+  Filter(f, part)
+}
+lapplyPartition(x, filter.func)
+  })
+
+#' @rdname filterRDD
+#' @aliases Filter
+setMethod(Filter,
+  signature(f = function, x = RDD),
+  function(f, x) {
+filterRDD(x, f)
+  })
+
+#' Reduce across elements of an RDD.
+#'
+#' This function reduces the elements of this RDD using the
+#' specified commutative and associative binary operator.
+#'
+#' @param x The RDD to reduce
+#' @param func Commutative and associative function to apply on elements
+#' of the RDD.
+#' @examples
+#'\dontrun{
+#' sc - sparkR.init()
+#' rdd - parallelize(sc, 1:10)
+#' reduce(rdd, +) # 55
+#'}
+#' @rdname reduce
+#' @aliases reduce,RDD,ANY-method
+setMethod(reduce,
+  signature(x = RDD, func = ANY),
+  function(x, func) {
+
+reducePartition - function(part) {
+  Reduce(func, part)
+}
+
+partitionList - collect(lapplyPartition(x, reducePartition),
+ flatten = FALSE)
+Reduce(func, partitionList)
+  })
+
+#' Get the maximum element of an RDD.
+#'
+#' @param x The RDD to get the maximum element from
+#' @examples
+#'\dontrun{
+#' sc - sparkR.init()
+#' rdd - parallelize(sc, 1:10)
+#' maximum(rdd) # 10
+#'}
+#' @rdname maximum
+#' @aliases maximum,RDD
+setMethod(maximum,
+  signature(x = RDD),
+  function(x) {
+reduce(x, max)
+  })
+
+#' Get the minimum element of an RDD.
+#'
+#' @param x The RDD to get the minimum element from
+#' @examples
+#'\dontrun{
+#' sc - sparkR.init()
+#' rdd - parallelize(sc, 1:10)
+#' minimum(rdd) # 1
+#'}
+#' @rdname minimum
+#' @aliases minimum,RDD
+setMethod(minimum,
+  signature(x = RDD),
+  function(x) {
+reduce(x, min)
+  })
+
+#' Add up the elements in an RDD.
+#'
+#' @param x The RDD to add up the elements in
+#' @examples
+#'\dontrun{
+#' sc - sparkR.init()
+#' rdd - parallelize(sc, 1:10)
+#' sumRDD(rdd) # 55
+#'}
+#' @rdname sumRDD 
+#' @aliases sumRDD,RDD
+setMethod(sumRDD,
+  signature(x = RDD),
+  function(x) {
+reduce(x, +)
+  })
+
+#' Applies a function to all elements in an RDD, and force evaluation.
+#'
+#' @param x The RDD to apply the function
+#' @param func The function to be applied.
+#' @return invisible NULL.
+#' @examples
+#'\dontrun{
+#' sc - sparkR.init()
+#' rdd - parallelize(sc, 1:10)
+#' foreach(rdd, function(x) { save(x, file=...) })
+#'}
+#' @rdname foreach
+#' @aliases foreach,RDD,function-method
+setMethod(foreach,
+  signature(x = RDD, func = function),
+  function(x, func) {
+partition.func - function(x) {
+  lapply(x, func)
+  NULL
+}
+invisible(collect(mapPartitions(x, partition.func)))
+  })
+
+#' Applies a function to each partition in an RDD, and force evaluation.
+#'
+#' @examples
+#'\dontrun{
+#' sc - sparkR.init()
+#' rdd - parallelize(sc, 1:10)
+#' foreachPartition(rdd, function(part) { save(part, file=...); NULL })
+#'}
+#' @rdname foreach
+#' @aliases foreachPartition,RDD,function-method
+setMethod(foreachPartition,
+  signature(x = RDD, func = function),
+  function(x, func) {
+invisible(collect(mapPartitions(x, func)))
+  })
+
+#' Take elements from an RDD.
+#'
+#' This function takes the first NUM elements in the RDD and
+#' returns them in a list.
+#'
+#' @param x The RDD to take elements from
+#' @param num Number of elements to take
+#' @examples
+#'\dontrun{
+#' sc - sparkR.init()
+#' rdd - parallelize(sc, 1:10)
+#' take(rdd, 2L) # list(1, 2)
+#'}
+#' @rdname take
+#' @aliases take,RDD,numeric-method
+setMethod(take,
+  signature(x = RDD, num = numeric),
+  function(x, num) {
+resList - list()
+index - -1
+jrdd - getJRDD(x)
+numPartitions - numPartitions(x)
+
+# TODO(shivaram): Collect more than one partition based on size
+# estimates similar to the scala version of `take`.
+while (TRUE) {
+  index - index + 1
+
+  if (length(resList) = num || index = numPartitions)
+break
+
+  # a JList of byte arrays
+  partitionArr - callJMethod(jrdd, collectPartitions, 
as.list(as.integer(index)))
+  partition - partitionArr[[1]]
+
+  size - num - length(resList)
+  # elems is capped to have at most `size` elements
+  elems - convertJListToRList(partition,
+   flatten = TRUE

[7/7] spark git commit: [SPARK-5654] Integrate SparkR

2015-04-08 Thread shivaram

[SPARK-5654] Integrate SparkR

This pull requests integrates SparkR, an R frontend for Spark. The SparkR 
package contains both RDD and DataFrame APIs in R and is integrated with 
Spark's submission scripts to work on different cluster managers.

Some integration points that would be great to get feedback on:

1. Build procedure: SparkR requires R to be installed on the machine to be 
built. Right now we have a new Maven profile `-PsparkR` that can be used to 
enable SparkR builds

2. YARN cluster mode: The R package that is built needs to be present on the 
driver and all the worker nodes during execution. The R package location is 
currently set using SPARK_HOME, but this might not work on YARN cluster mode.

The SparkR package represents the work of many contributors and attached below 
is a list of people along with areas they worked on

edwardt (edwart) - Documentation improvements
Felix Cheung (felixcheung) - Documentation improvements
Hossein Falaki (falaki)  - Documentation improvements
Chris Freeman (cafreeman) - DataFrame API, Programming Guide
Todd Gao (7c00) - R worker Internals
Ryan Hafen (hafen) - SparkR Internals
Qian Huang (hqzizania) - RDD API
Hao Lin (hlin09) - RDD API, Closure cleaner
Evert Lammerts (evertlammerts) - DataFrame API
Davies Liu (davies) - DataFrame API, R worker internals, Merging with Spark
Yi Lu (lythesia) - RDD API, Worker internals
Matt Massie (massie) - Jenkins build
Harihar Nahak (hnahak87) - SparkR examples
Oscar Olmedo (oscaroboto) - Spark configuration
Antonio Piccolboni (piccolbo) - SparkR examples, Namespace bug fixes
Dan Putler (dputler) - Dataframe API, SparkR Install Guide
Ashutosh Raina (ashutoshraina) - Build improvements
Josh Rosen (joshrosen) - Travis CI build
Sun Rui (sun-rui)- RDD API, JVM Backend, Shuffle improvements
Shivaram Venkataraman (shivaram) - RDD API, JVM Backend, Worker Internals
Zongheng Yang (concretevitamin) - RDD API, Pipelined RDDs, Examples and EC2 
guide

Author: Shivaram Venkataraman shiva...@cs.berkeley.edu
Author: Shivaram Venkataraman shivaram.venkatara...@gmail.com
Author: Zongheng Yang zonghen...@gmail.com
Author: cafreeman cfree...@alteryx.com
Author: Shivaram Venkataraman shiva...@eecs.berkeley.edu
Author: Davies Liu dav...@databricks.com
Author: Davies Liu davies@gmail.com
Author: hlin09 hlin0...@gmail.com
Author: Sun Rui rui@intel.com
Author: lythesia iranaik...@gmail.com
Author: oscaroboto osca...@gmail.com
Author: Antonio Piccolboni anto...@piccolboni.info
Author: root edward
Author: edwardt edwardt.t...@gmail.com
Author: hqzizania qian.hu...@intel.com
Author: dputler dan.put...@gmail.com
Author: Todd Gao todd.gao.2...@gmail.com
Author: Chris Freeman cfree...@alteryx.com
Author: Felix Cheung fcheung@AVVOMAC-119.local
Author: Hossein hoss...@databricks.com
Author: Evert Lammerts ev...@apache.org
Author: Felix Cheung fche...@avvomac-119.t-mobile.com
Author: felixcheung felixcheun...@hotmail.com
Author: Ryan Hafen rha...@gmail.com
Author: Ashutosh Raina ashutoshra...@users.noreply.github.com
Author: Oscar Olmedo osca...@gmail.com
Author: Josh Rosen rosenvi...@gmail.com
Author: Yi Lu iranaik...@gmail.com
Author: Harihar Nahak hnaha...@users.noreply.github.com

Closes #5096 from shivaram/R and squashes the following commits:

da64742 [Davies Liu] fix Date serialization
59266d1 [Davies Liu] check exclusive of primary-py-file and primary-r-file
55808e4 [Davies Liu] fix tests
5581c75 [Davies Liu] update author of SparkR
f731b48 [Shivaram Venkataraman] Only run SparkR tests if R is installed
64eda24 [Shivaram Venkataraman] Merge branch 'R' of 
https://github.com/amplab-extras/spark into R
d7c3f22 [Shivaram Venkataraman] Address code review comments Changes include 1. 
Adding SparkR docs to API docs generated 2. Style fixes in SparkR scala files 
3. Clean up of shell scripts and explanation of install-dev.sh
377151f [Shivaram Venkataraman] Merge remote-tracking branch 'apache/master' 
into R
eb5da53 [Shivaram Venkataraman] Merge pull request #3 from davies/R2
a18ff5c [Davies Liu] Update sparkR.R
5133f3a [Shivaram Venkataraman] Merge pull request #7 from hqzizania/R3
940b631 [hqzizania] [SPARKR-92] Phase 2: implement sum(rdd)
0e788c0 [Shivaram Venkataraman] Merge pull request #5 from hlin09/doc-fix
3487461 [hlin09] Add tests log in .gitignore.
1d1802e [Shivaram Venkataraman] Merge pull request #4 from felixcheung/r-require
11981b7 [felixcheung] Update R to fail early if SparkR package is missing
c300e08 [Davies Liu] remove duplicated file
b045701 [Davies Liu] Merge branch 'remote_r' into R
19c9368 [Davies Liu] Merge branch 'sparkr-sql' of 
github.com:amplab-extras/SparkR-pkg into remote_r
f8fa8af [Davies Liu] mute logging when start/stop context
e7104b6 [Davies Liu] remove ::: in SparkR
a1777eb [Davies Liu] move rules into R/.gitignore
e88b649 [Davies Liu] Merge branch 'R' of github.com:amplab-extras/spark into R
6e20e71 [Davies Liu] address comments
b433817 [Davies Liu] Merge branch 'master' of github.com:apache/spark into R

[6/7] spark git commit: [SPARK-5654] Integrate SparkR

2015-04-08 Thread shivaram

http://git-wip-us.apache.org/repos/asf/spark/blob/2fe0a1aa/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
new file mode 100644
index 000..a354cdc
--- /dev/null
+++ b/R/pkg/NAMESPACE
@@ -0,0 +1,182 @@
+#exportPattern(^[[:alpha:]]+)
+exportClasses(RDD)
+exportClasses(Broadcast)
+exportMethods(
+  aggregateByKey,
+  aggregateRDD,
+  cache,
+  checkpoint,
+  coalesce,
+  cogroup,
+  collect,
+  collectAsMap,
+  collectPartition,
+  combineByKey,
+  count,
+  countByKey,
+  countByValue,
+  distinct,
+  Filter,
+  filterRDD,
+  first,
+  flatMap,
+  flatMapValues,
+  fold,
+  foldByKey,
+  foreach,
+  foreachPartition,
+  fullOuterJoin,
+  glom,
+  groupByKey,
+  join,
+  keyBy,
+  keys,
+  length,
+  lapply,
+  lapplyPartition,
+  lapplyPartitionsWithIndex,
+  leftOuterJoin,
+  lookup,
+  map,
+  mapPartitions,
+  mapPartitionsWithIndex,
+  mapValues,
+  maximum,
+  minimum,
+  numPartitions,
+  partitionBy,
+  persist,
+  pipeRDD,
+  reduce,
+  reduceByKey,
+  reduceByKeyLocally,
+  repartition,
+  rightOuterJoin,
+  sampleRDD,
+  saveAsTextFile,
+  saveAsObjectFile,
+  sortBy,
+  sortByKey,
+  sumRDD,
+  take,
+  takeOrdered,
+  takeSample,
+  top,
+  unionRDD,
+  unpersist,
+  value,
+  values,
+  zipRDD,
+  zipWithIndex,
+  zipWithUniqueId
+ )
+
+# S3 methods exported
+export(
+   textFile,
+   objectFile,
+   parallelize,
+   hashCode,
+   includePackage,
+   broadcast,
+   setBroadcastValue,
+   setCheckpointDir
+  )
+export(sparkR.init)
+export(sparkR.stop)
+export(print.jobj)
+useDynLib(SparkR, stringHashCode)
+importFrom(methods, setGeneric, setMethod, setOldClass)
+
+# SparkRSQL
+
+exportClasses(DataFrame)
+
+exportMethods(columns,
+  distinct,
+  dtypes,
+  explain,
+  filter,
+  groupBy,
+  head,
+  insertInto,
+  intersect,
+  isLocal,
+  limit,
+  orderBy,
+  names,
+  printSchema,
+  registerTempTable,
+  repartition,
+  sampleDF,
+  saveAsParquetFile,
+  saveAsTable,
+  saveDF,
+  schema,
+  select,
+  selectExpr,
+  show,
+  showDF,
+  sortDF,
+  subtract,
+  toJSON,
+  toRDD,
+  unionAll,
+  where,
+  withColumn,
+  withColumnRenamed)
+
+exportClasses(Column)
+
+exportMethods(abs,
+  alias,
+  approxCountDistinct,
+  asc,
+  avg,
+  cast,
+  contains,
+  countDistinct,
+  desc,
+  endsWith,
+  getField,
+  getItem,
+  isNotNull,
+  isNull,
+  last,
+  like,
+  lower,
+  max,
+  mean,
+  min,
+  rlike,
+  sqrt,
+  startsWith,
+  substr,
+  sum,
+  sumDistinct,
+  upper)
+
+exportClasses(GroupedData)
+exportMethods(agg)
+
+export(sparkRSQL.init,
+   sparkRHive.init)
+
+export(cacheTable,
+   clearCache,
+   createDataFrame,
+   createExternalTable,
+   dropTempTable,
+   jsonFile,
+   jsonRDD,
+   loadDF,
+   parquetFile,
+   sql,
+   table,
+   tableNames,
+   tables,
+   toDF,
+   uncacheTable)
+
+export(print.structType,
+   print.structField)

http://git-wip-us.apache.org/repos/asf/spark/blob/2fe0a1aa/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
new file mode 100644
index 000..feafd56
--- /dev/null
+++ b/R/pkg/R/DataFrame.R
@@ -0,0 +1,1270 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional

[4/7] spark git commit: [SPARK-5654] Integrate SparkR

2015-04-08 Thread shivaram

http://git-wip-us.apache.org/repos/asf/spark/blob/2fe0a1aa/R/pkg/R/context.R
--
diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
new file mode 100644
index 000..2fc0bb2
--- /dev/null
+++ b/R/pkg/R/context.R
@@ -0,0 +1,225 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the License); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# context.R: SparkContext driven functions
+
+getMinSplits - function(sc, minSplits) {
+  if (is.null(minSplits)) {
+defaultParallelism - callJMethod(sc, defaultParallelism)
+minSplits - min(defaultParallelism, 2)
+  }
+  as.integer(minSplits)
+}
+
+#' Create an RDD from a text file.
+#'
+#' This function reads a text file from HDFS, a local file system (available 
on all
+#' nodes), or any Hadoop-supported file system URI, and creates an
+#' RDD of strings from it.
+#'
+#' @param sc SparkContext to use
+#' @param path Path of file to read. A vector of multiple paths is allowed.
+#' @param minSplits Minimum number of splits to be created. If NULL, the 
default
+#'  value is chosen based on available parallelism.
+#' @return RDD where each item is of type \code{character}
+#' @export
+#' @examples
+#'\dontrun{
+#'  sc - sparkR.init()
+#'  lines - textFile(sc, myfile.txt)
+#'}
+textFile - function(sc, path, minSplits = NULL) {
+  # Allow the user to have a more flexible definiton of the text file path
+  path - suppressWarnings(normalizePath(path))
+  #' Convert a string vector of paths to a string containing comma separated 
paths
+  path - paste(path, collapse = ,)
+
+  jrdd - callJMethod(sc, textFile, path, getMinSplits(sc, minSplits))
+  # jrdd is of type JavaRDD[String]
+  RDD(jrdd, string)
+}
+
+#' Load an RDD saved as a SequenceFile containing serialized objects.
+#'
+#' The file to be loaded should be one that was previously generated by calling
+#' saveAsObjectFile() of the RDD class.
+#'
+#' @param sc SparkContext to use
+#' @param path Path of file to read. A vector of multiple paths is allowed.
+#' @param minSplits Minimum number of splits to be created. If NULL, the 
default
+#'  value is chosen based on available parallelism.
+#' @return RDD containing serialized R objects.
+#' @seealso saveAsObjectFile
+#' @export
+#' @examples
+#'\dontrun{
+#'  sc - sparkR.init()
+#'  rdd - objectFile(sc, myfile)
+#'}
+objectFile - function(sc, path, minSplits = NULL) {
+  # Allow the user to have a more flexible definiton of the text file path
+  path - suppressWarnings(normalizePath(path))
+  #' Convert a string vector of paths to a string containing comma separated 
paths
+  path - paste(path, collapse = ,)
+
+  jrdd - callJMethod(sc, objectFile, path, getMinSplits(sc, minSplits))
+  # Assume the RDD contains serialized R objects.
+  RDD(jrdd, byte)
+}
+
+#' Create an RDD from a homogeneous list or vector.
+#'
+#' This function creates an RDD from a local homogeneous list in R. The 
elements
+#' in the list are split into \code{numSlices} slices and distributed to nodes
+#' in the cluster.
+#'
+#' @param sc SparkContext to use
+#' @param coll collection to parallelize
+#' @param numSlices number of partitions to create in the RDD
+#' @return an RDD created from this collection
+#' @export
+#' @examples
+#'\dontrun{
+#' sc - sparkR.init()
+#' rdd - parallelize(sc, 1:10, 2)
+#' # The RDD should contain 10 elements
+#' length(rdd)
+#'}
+parallelize - function(sc, coll, numSlices = 1) {
+  # TODO: bound/safeguard numSlices
+  # TODO: unit tests for if the split works for all primitives
+  # TODO: support matrix, data frame, etc
+  if ((!is.list(coll)  !is.vector(coll)) || is.data.frame(coll)) {
+if (is.data.frame(coll)) {
+  message(paste(context.R: A data frame is parallelized by columns.))
+} else {
+  if (is.matrix(coll)) {
+message(paste(context.R: A matrix is parallelized by elements.))
+  } else {
+message(paste(context.R: parallelize() currently only supports lists 
and vectors.,
+  Calling as.list() to coerce coll into a list.))
+  }
+}
+coll - as.list(coll)
+  }
+
+  if (numSlices  length(coll))
+numSlices - length(coll)
+
+  sliceLen - ceiling(length(coll) / numSlices)
+  slices - split(coll, rep(1:(numSlices + 1), each =

[1/7] spark git commit: [SPARK-5654] Integrate SparkR

2015-04-08 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 1b2aab8d5 - 2fe0a1aae


http://git-wip-us.apache.org/repos/asf/spark/blob/2fe0a1aa/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
--
diff --git a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala 
b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
new file mode 100644
index 000..ccb2a37
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
@@ -0,0 +1,340 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the License); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.r
+
+import java.io.{DataInputStream, DataOutputStream}
+import java.sql.{Date, Time}
+
+import scala.collection.JavaConversions._
+
+/**
+ * Utility functions to serialize, deserialize objects to / from R
+ */
+private[spark] object SerDe {
+
+  // Type mapping from R to Java
+  //
+  // NULL - void
+  // integer - Int
+  // character - String
+  // logical - Boolean
+  // double, numeric - Double
+  // raw - Array[Byte]
+  // Date - Date
+  // POSIXlt/POSIXct - Time
+  //
+  // list[T] - Array[T], where T is one of above mentioned types
+  // environment - Map[String, T], where T is a native type
+  // jobj - Object, where jobj is an object created in the backend
+
+  def readObjectType(dis: DataInputStream): Char = {
+dis.readByte().toChar
+  }
+
+  def readObject(dis: DataInputStream): Object = {
+val dataType = readObjectType(dis)
+readTypedObject(dis, dataType)
+  }
+
+  def readTypedObject(
+  dis: DataInputStream,
+  dataType: Char): Object = {
+dataType match {
+  case 'n' = null
+  case 'i' = new java.lang.Integer(readInt(dis))
+  case 'd' = new java.lang.Double(readDouble(dis))
+  case 'b' = new java.lang.Boolean(readBoolean(dis))
+  case 'c' = readString(dis)
+  case 'e' = readMap(dis)
+  case 'r' = readBytes(dis)
+  case 'l' = readList(dis)
+  case 'D' = readDate(dis)
+  case 't' = readTime(dis)
+  case 'j' = JVMObjectTracker.getObject(readString(dis))
+  case _ = throw new IllegalArgumentException(sInvalid type $dataType)
+}
+  }
+
+  def readBytes(in: DataInputStream): Array[Byte] = {
+val len = readInt(in)
+val out = new Array[Byte](len)
+val bytesRead = in.readFully(out)
+out
+  }
+
+  def readInt(in: DataInputStream): Int = {
+in.readInt()
+  }
+
+  def readDouble(in: DataInputStream): Double = {
+in.readDouble()
+  }
+
+  def readString(in: DataInputStream): String = {
+val len = in.readInt()
+val asciiBytes = new Array[Byte](len)
+in.readFully(asciiBytes)
+assert(asciiBytes(len - 1) == 0)
+val str = new String(asciiBytes.dropRight(1).map(_.toChar))
+str
+  }
+
+  def readBoolean(in: DataInputStream): Boolean = {
+val intVal = in.readInt()
+if (intVal == 0) false else true
+  }
+
+  def readDate(in: DataInputStream): Date = {
+Date.valueOf(readString(in))
+  }
+
+  def readTime(in: DataInputStream): Time = {
+val t = in.readDouble()
+new Time((t * 1000L).toLong)
+  }
+
+  def readBytesArr(in: DataInputStream): Array[Array[Byte]] = {
+val len = readInt(in)
+(0 until len).map(_ = readBytes(in)).toArray
+  }
+
+  def readIntArr(in: DataInputStream): Array[Int] = {
+val len = readInt(in)
+(0 until len).map(_ = readInt(in)).toArray
+  }
+
+  def readDoubleArr(in: DataInputStream): Array[Double] = {
+val len = readInt(in)
+(0 until len).map(_ = readDouble(in)).toArray
+  }
+
+  def readBooleanArr(in: DataInputStream): Array[Boolean] = {
+val len = readInt(in)
+(0 until len).map(_ = readBoolean(in)).toArray
+  }
+
+  def readStringArr(in: DataInputStream): Array[String] = {
+val len = readInt(in)
+(0 until len).map(_ = readString(in)).toArray
+  }
+
+  def readList(dis: DataInputStream): Array[_] = {
+val arrType = readObjectType(dis)
+arrType match {
+  case 'i' = readIntArr(dis)
+  case 'c' = readStringArr(dis)
+  case 'd' = readDoubleArr(dis)
+  case 'b' = readBooleanArr(dis)
+  case 'j' = readStringArr(dis).map(x = JVMObjectTracker.getObject(x))
+  case 'r' = readBytesArr(dis)
+  case _ = throw new

spark git commit: [SPARK-6246] [EC2] fixed support for more than 100 nodes

2015-05-19 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master bcb1ff814 - 2bc5e0616


[SPARK-6246] [EC2] fixed support for more than 100 nodes

This is a small fix. But it is important for amazon users because as the ticket 
states, spark-ec2 can't handle clusters with  100 nodes now.

Author: alyaxey oleksii.sliusare...@grammarly.com

Closes #6267 from alyaxey/ec2_100_nodes_fix and squashes the following commits:

1e0d747 [alyaxey] [SPARK-6246] fixed support for more than 100 nodes


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2bc5e061
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2bc5e061
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2bc5e061

Branch: refs/heads/master
Commit: 2bc5e0616d878b09daa8e31a7a1fdb7127bca079
Parents: bcb1ff8
Author: alyaxey oleksii.sliusare...@grammarly.com
Authored: Tue May 19 16:45:52 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Tue May 19 16:45:52 2015 -0700

--
 ec2/spark_ec2.py | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2bc5e061/ec2/spark_ec2.py
--
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index be92d5f..c6d5a1f 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -864,7 +864,11 @@ def wait_for_cluster_state(conn, opts, cluster_instances, 
cluster_state):
 for i in cluster_instances:
 i.update()
 
-statuses = conn.get_all_instance_status(instance_ids=[i.id for i in 
cluster_instances])
+max_batch = 100
+statuses = []
+for j in xrange(0, len(cluster_instances), max_batch):
+batch = [i.id for i in cluster_instances[j:j + max_batch]]
+statuses.extend(conn.get_all_instance_status(instance_ids=batch))
 
 if cluster_state == 'ssh-ready':
 if all(i.state == 'running' for i in cluster_instances) and \


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [MINOR] Add 1.3, 1.3.1 to master branch EC2 scripts

2015-05-17 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master ba4f8ca0d - 1a7b9ce80


[MINOR] Add 1.3, 1.3.1 to master branch EC2 scripts

cc pwendell

P.S: I can't believe this was outdated all along ?

Author: Shivaram Venkataraman shiva...@cs.berkeley.edu

Closes #6215 from shivaram/update-ec2-map and squashes the following commits:

ae3937a [Shivaram Venkataraman] Add 1.3, 1.3.1 to master branch EC2 scripts


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1a7b9ce8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1a7b9ce8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1a7b9ce8

Branch: refs/heads/master
Commit: 1a7b9ce80bb5649796dda48d6a6d662a2809d0ef
Parents: ba4f8ca
Author: Shivaram Venkataraman shiva...@cs.berkeley.edu
Authored: Sun May 17 00:12:20 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Sun May 17 00:12:20 2015 -0700

--
 ec2/spark_ec2.py | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1a7b9ce8/ec2/spark_ec2.py
--
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index ab4a96f..be92d5f 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -48,7 +48,7 @@ else:
 from urllib.request import urlopen, Request
 from urllib.error import HTTPError
 
-SPARK_EC2_VERSION = 1.2.1
+SPARK_EC2_VERSION = 1.3.1
 SPARK_EC2_DIR = os.path.dirname(os.path.realpath(__file__))
 
 VALID_SPARK_VERSIONS = set([
@@ -65,6 +65,8 @@ VALID_SPARK_VERSIONS = set([
 1.1.1,
 1.2.0,
 1.2.1,
+1.3.0,
+1.3.1,
 ])
 
 SPARK_TACHYON_MAP = {
@@ -75,6 +77,8 @@ SPARK_TACHYON_MAP = {
 1.1.1: 0.5.0,
 1.2.0: 0.5.0,
 1.2.1: 0.5.0,
+1.3.0: 0.5.0,
+1.3.1: 0.5.0,
 }
 
 DEFAULT_SPARK_VERSION = SPARK_EC2_VERSION


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [MINOR] Add 1.3, 1.3.1 to master branch EC2 scripts

2015-05-17 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.4 671a6bca5 - 0ed376afa


[MINOR] Add 1.3, 1.3.1 to master branch EC2 scripts

cc pwendell

P.S: I can't believe this was outdated all along ?

Author: Shivaram Venkataraman shiva...@cs.berkeley.edu

Closes #6215 from shivaram/update-ec2-map and squashes the following commits:

ae3937a [Shivaram Venkataraman] Add 1.3, 1.3.1 to master branch EC2 scripts

(cherry picked from commit 1a7b9ce80bb5649796dda48d6a6d662a2809d0ef)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0ed376af
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0ed376af
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0ed376af

Branch: refs/heads/branch-1.4
Commit: 0ed376afad603b7afd86bb8eb312cad6edae2b9c
Parents: 671a6bc
Author: Shivaram Venkataraman shiva...@cs.berkeley.edu
Authored: Sun May 17 00:12:20 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Sun May 17 00:12:46 2015 -0700

--
 ec2/spark_ec2.py | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0ed376af/ec2/spark_ec2.py
--
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index ab4a96f..be92d5f 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -48,7 +48,7 @@ else:
 from urllib.request import urlopen, Request
 from urllib.error import HTTPError
 
-SPARK_EC2_VERSION = 1.2.1
+SPARK_EC2_VERSION = 1.3.1
 SPARK_EC2_DIR = os.path.dirname(os.path.realpath(__file__))
 
 VALID_SPARK_VERSIONS = set([
@@ -65,6 +65,8 @@ VALID_SPARK_VERSIONS = set([
 1.1.1,
 1.2.0,
 1.2.1,
+1.3.0,
+1.3.1,
 ])
 
 SPARK_TACHYON_MAP = {
@@ -75,6 +77,8 @@ SPARK_TACHYON_MAP = {
 1.1.1: 0.5.0,
 1.2.0: 0.5.0,
 1.2.1: 0.5.0,
+1.3.0: 0.5.0,
+1.3.1: 0.5.0,
 }
 
 DEFAULT_SPARK_VERSION = SPARK_EC2_VERSION


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8063] [SPARKR] Spark master URL conflict between MASTER env variable and --master command line option.

2015-06-03 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master d2a86eb8f - 708c63bbb


[SPARK-8063] [SPARKR] Spark master URL conflict between MASTER env variable and 
--master command line option.

Author: Sun Rui rui@intel.com

Closes #6605 from sun-rui/SPARK-8063 and squashes the following commits:

51ca48b [Sun Rui] [SPARK-8063][SPARKR] Spark master URL conflict between MASTER 
env variable and --master command line option.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/708c63bb
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/708c63bb
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/708c63bb

Branch: refs/heads/master
Commit: 708c63bbbe9580eb774fe47e23ef61338103afda
Parents: d2a86eb
Author: Sun Rui rui@intel.com
Authored: Wed Jun 3 11:56:35 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Wed Jun 3 11:56:35 2015 -0700

--
 R/pkg/inst/profile/shell.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/708c63bb/R/pkg/inst/profile/shell.R
--
diff --git a/R/pkg/inst/profile/shell.R b/R/pkg/inst/profile/shell.R
index ca94f1d..773b6ec 100644
--- a/R/pkg/inst/profile/shell.R
+++ b/R/pkg/inst/profile/shell.R
@@ -24,7 +24,7 @@
   old - getOption(defaultPackages)
   options(defaultPackages = c(old, SparkR))
 
-  sc - SparkR::sparkR.init(Sys.getenv(MASTER, unset = ))
+  sc - SparkR::sparkR.init()
   assign(sc, sc, envir=.GlobalEnv)
   sqlContext - SparkR::sparkRSQL.init(sc)
   assign(sqlContext, sqlContext, envir=.GlobalEnv)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8063] [SPARKR] Spark master URL conflict between MASTER env variable and --master command line option.

2015-06-03 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.4 0a1dad6cd - f67a27d02


[SPARK-8063] [SPARKR] Spark master URL conflict between MASTER env variable and 
--master command line option.

Author: Sun Rui rui@intel.com

Closes #6605 from sun-rui/SPARK-8063 and squashes the following commits:

51ca48b [Sun Rui] [SPARK-8063][SPARKR] Spark master URL conflict between MASTER 
env variable and --master command line option.

(cherry picked from commit 708c63bbbe9580eb774fe47e23ef61338103afda)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f67a27d0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f67a27d0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f67a27d0

Branch: refs/heads/branch-1.4
Commit: f67a27d02699af24d5a2ccb843954a643a7ba078
Parents: 0a1dad6
Author: Sun Rui rui@intel.com
Authored: Wed Jun 3 11:56:35 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Wed Jun 3 11:57:00 2015 -0700

--
 R/pkg/inst/profile/shell.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f67a27d0/R/pkg/inst/profile/shell.R
--
diff --git a/R/pkg/inst/profile/shell.R b/R/pkg/inst/profile/shell.R
index ca94f1d..773b6ec 100644
--- a/R/pkg/inst/profile/shell.R
+++ b/R/pkg/inst/profile/shell.R
@@ -24,7 +24,7 @@
   old - getOption(defaultPackages)
   options(defaultPackages = c(old, SparkR))
 
-  sc - SparkR::sparkR.init(Sys.getenv(MASTER, unset = ))
+  sc - SparkR::sparkR.init()
   assign(sc, sc, envir=.GlobalEnv)
   sqlContext - SparkR::sparkRSQL.init(sc)
   assign(sqlContext, sqlContext, envir=.GlobalEnv)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8084] [SPARKR] Make SparkR scripts fail on error

2015-06-03 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.4 16748694b - c2c129073


[SPARK-8084] [SPARKR] Make SparkR scripts fail on error

cc shaneknapp pwendell JoshRosen

Author: Shivaram Venkataraman shiva...@cs.berkeley.edu

Closes #6623 from shivaram/SPARK-8084 and squashes the following commits:

0ec5b26 [Shivaram Venkataraman] Make SparkR scripts fail on error

(cherry picked from commit 0576c3c4ff9d9bbff208e915bee1ac0d4956548c)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c2c12907
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c2c12907
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c2c12907

Branch: refs/heads/branch-1.4
Commit: c2c129073f97de5c35532177b0811ff0892429b2
Parents: 1674869
Author: Shivaram Venkataraman shiva...@cs.berkeley.edu
Authored: Wed Jun 3 17:02:16 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Wed Jun 3 17:02:29 2015 -0700

--
 R/create-docs.sh | 3 +++
 R/install-dev.sh | 2 ++
 2 files changed, 5 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c2c12907/R/create-docs.sh
--
diff --git a/R/create-docs.sh b/R/create-docs.sh
index 4194172..af47c08 100755
--- a/R/create-docs.sh
+++ b/R/create-docs.sh
@@ -23,6 +23,9 @@
 # After running this script the html docs can be found in 
 # $SPARK_HOME/R/pkg/html
 
+set -o pipefail
+set -e
+
 # Figure out where the script is
 export FWDIR=$(cd `dirname $0`; pwd)
 pushd $FWDIR

http://git-wip-us.apache.org/repos/asf/spark/blob/c2c12907/R/install-dev.sh
--
diff --git a/R/install-dev.sh b/R/install-dev.sh
index 55ed6f4..b9e2527 100755
--- a/R/install-dev.sh
+++ b/R/install-dev.sh
@@ -26,6 +26,8 @@
 # NOTE(shivaram): Right now we use $SPARK_HOME/R/lib to be the installation 
directory
 # to load the SparkR package on the worker nodes.
 
+set -o pipefail
+set -e
 
 FWDIR=$(cd `dirname $0`; pwd)
 LIB_DIR=$FWDIR/lib


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8084] [SPARKR] Make SparkR scripts fail on error

2015-06-03 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 51898b515 - 0576c3c4f


[SPARK-8084] [SPARKR] Make SparkR scripts fail on error

cc shaneknapp pwendell JoshRosen

Author: Shivaram Venkataraman shiva...@cs.berkeley.edu

Closes #6623 from shivaram/SPARK-8084 and squashes the following commits:

0ec5b26 [Shivaram Venkataraman] Make SparkR scripts fail on error


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0576c3c4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0576c3c4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0576c3c4

Branch: refs/heads/master
Commit: 0576c3c4ff9d9bbff208e915bee1ac0d4956548c
Parents: 51898b5
Author: Shivaram Venkataraman shiva...@cs.berkeley.edu
Authored: Wed Jun 3 17:02:16 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Wed Jun 3 17:02:16 2015 -0700

--
 R/create-docs.sh | 3 +++
 R/install-dev.sh | 2 ++
 2 files changed, 5 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0576c3c4/R/create-docs.sh
--
diff --git a/R/create-docs.sh b/R/create-docs.sh
index 4194172..af47c08 100755
--- a/R/create-docs.sh
+++ b/R/create-docs.sh
@@ -23,6 +23,9 @@
 # After running this script the html docs can be found in 
 # $SPARK_HOME/R/pkg/html
 
+set -o pipefail
+set -e
+
 # Figure out where the script is
 export FWDIR=$(cd `dirname $0`; pwd)
 pushd $FWDIR

http://git-wip-us.apache.org/repos/asf/spark/blob/0576c3c4/R/install-dev.sh
--
diff --git a/R/install-dev.sh b/R/install-dev.sh
index 55ed6f4..b9e2527 100755
--- a/R/install-dev.sh
+++ b/R/install-dev.sh
@@ -26,6 +26,8 @@
 # NOTE(shivaram): Right now we use $SPARK_HOME/R/lib to be the installation 
directory
 # to load the SparkR package on the worker nodes.
 
+set -o pipefail
+set -e
 
 FWDIR=$(cd `dirname $0`; pwd)
 LIB_DIR=$FWDIR/lib


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-6820] [SPARKR] Convert NAs to null type in SparkR DataFrames

2015-06-08 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 82870d507 - a5c52c1a3


[SPARK-6820] [SPARKR] Convert NAs to null type in SparkR DataFrames

Author: hqzizania qian.hu...@intel.com

Closes #6190 from hqzizania/R and squashes the following commits:

1641f9e [hqzizania] fixes and add test units
bb3411a [hqzizania] Convert NAs to null type in SparkR DataFrames


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a5c52c1a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a5c52c1a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a5c52c1a

Branch: refs/heads/master
Commit: a5c52c1a3488b69bec19e460d2d1fdb0c9ada58d
Parents: 82870d5
Author: hqzizania qian.hu...@intel.com
Authored: Mon Jun 8 21:40:12 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Mon Jun 8 21:40:12 2015 -0700

--
 R/pkg/R/serialize.R  |  8 
 R/pkg/inst/tests/test_sparkSQL.R | 37 +++
 2 files changed, 45 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a5c52c1a/R/pkg/R/serialize.R
--
diff --git a/R/pkg/R/serialize.R b/R/pkg/R/serialize.R
index 2081786..3169d79 100644
--- a/R/pkg/R/serialize.R
+++ b/R/pkg/R/serialize.R
@@ -37,6 +37,14 @@ writeObject - function(con, object, writeType = TRUE) {
   # passing in vectors as arrays and instead require arrays to be passed
   # as lists.
   type - class(object)[[1]]  # class of POSIXlt is c(POSIXlt, POSIXt)
+  # Checking types is needed here, since âis.naâ only handles atomic 
vectors,
+  # lists and pairlists
+  if (type %in% c(integer, character, logical, double, numeric)) {
+if (is.na(object)) {
+  object - NULL
+  type - NULL
+}
+  }
   if (writeType) {
 writeType(con, type)
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/a5c52c1a/R/pkg/inst/tests/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 30edfc8..8946348 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -101,6 +101,43 @@ test_that(create DataFrame from RDD, {
   expect_equal(dtypes(df), list(c(a, int), c(b, string)))
 })
 
+test_that(convert NAs to null type in DataFrames, {
+  rdd - parallelize(sc, list(list(1L, 2L), list(NA, 4L)))
+  df - createDataFrame(sqlContext, rdd, list(a, b))
+  expect_true(is.na(collect(df)[2, a]))
+  expect_equal(collect(df)[2, b], 4L)
+
+  l - data.frame(x = 1L, y = c(1L, NA_integer_, 3L))
+  df - createDataFrame(sqlContext, l)
+  expect_equal(collect(df)[2, x], 1L)
+  expect_true(is.na(collect(df)[2, y]))
+
+  rdd - parallelize(sc, list(list(1, 2), list(NA, 4)))
+  df - createDataFrame(sqlContext, rdd, list(a, b))
+  expect_true(is.na(collect(df)[2, a]))
+  expect_equal(collect(df)[2, b], 4)
+
+  l - data.frame(x = 1, y = c(1, NA_real_, 3))
+  df - createDataFrame(sqlContext, l)
+  expect_equal(collect(df)[2, x], 1)
+  expect_true(is.na(collect(df)[2, y]))
+
+  l - list(a, b, NA, d)
+  df - createDataFrame(sqlContext, l)
+  expect_true(is.na(collect(df)[3, _1]))
+  expect_equal(collect(df)[4, _1], d)
+
+  l - list(a, b, NA_character_, d)
+  df - createDataFrame(sqlContext, l)
+  expect_true(is.na(collect(df)[3, _1]))
+  expect_equal(collect(df)[4, _1], d)
+
+  l - list(TRUE, FALSE, NA, TRUE)
+  df - createDataFrame(sqlContext, l)
+  expect_true(is.na(collect(df)[3, _1]))
+  expect_equal(collect(df)[4, _1], TRUE)
+})
+
 test_that(toDF, {
   rdd - lapply(parallelize(sc, 1:10), function(x) { list(x, as.character(x)) 
})
   df - toDF(rdd, list(a, b))


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8085] [SPARKR] Support user-specified schema in read.df

2015-06-05 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.4 0ef2e9d35 - 3e3151e75


[SPARK-8085] [SPARKR] Support user-specified schema in read.df

cc davies sun-rui

Author: Shivaram Venkataraman shiva...@cs.berkeley.edu

Closes #6620 from shivaram/sparkr-read-schema and squashes the following 
commits:

16a6726 [Shivaram Venkataraman] Fix loadDF to pass schema Also add a unit test
a229877 [Shivaram Venkataraman] Use wrapper function to DataFrameReader
ee70ba8 [Shivaram Venkataraman] Support user-specified schema in read.df

(cherry picked from commit 12f5eaeee1235850a076ce5716d069bd2f1205a5)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3e3151e7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3e3151e7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3e3151e7

Branch: refs/heads/branch-1.4
Commit: 3e3151e755dd68aa9a75188d6ecb968c7c1dff24
Parents: 0ef2e9d
Author: Shivaram Venkataraman shiva...@cs.berkeley.edu
Authored: Fri Jun 5 10:19:03 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Fri Jun 5 10:19:15 2015 -0700

--
 R/pkg/R/SQLContext.R | 14 ++
 R/pkg/inst/tests/test_sparkSQL.R | 13 +
 .../scala/org/apache/spark/sql/api/r/SQLUtils.scala  | 15 +++
 3 files changed, 38 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3e3151e7/R/pkg/R/SQLContext.R
--
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 88e1a50..22a4b5b 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -452,7 +452,7 @@ dropTempTable - function(sqlContext, tableName) {
 #' df - read.df(sqlContext, path/to/file.json, source = json)
 #' }
 
-read.df - function(sqlContext, path = NULL, source = NULL, ...) {
+read.df - function(sqlContext, path = NULL, source = NULL, schema = NULL, 
...) {
   options - varargsToEnv(...)
   if (!is.null(path)) {
 options[['path']] - path
@@ -462,15 +462,21 @@ read.df - function(sqlContext, path = NULL, source = 
NULL, ...) {
 source - callJMethod(sqlContext, getConf, spark.sql.sources.default,
   org.apache.spark.sql.parquet)
   }
-  sdf - callJMethod(sqlContext, load, source, options)
+  if (!is.null(schema)) {
+stopifnot(class(schema) == structType)
+sdf - callJStatic(org.apache.spark.sql.api.r.SQLUtils, loadDF, 
sqlContext, source,
+   schema$jobj, options)
+  } else {
+sdf - callJStatic(org.apache.spark.sql.api.r.SQLUtils, loadDF, 
sqlContext, source, options)
+  }
   dataFrame(sdf)
 }
 
 #' @aliases loadDF
 #' @export
 
-loadDF - function(sqlContext, path = NULL, source = NULL, ...) {
-  read.df(sqlContext, path, source, ...)
+loadDF - function(sqlContext, path = NULL, source = NULL, schema = NULL, ...) 
{
+  read.df(sqlContext, path, source, schema, ...)
 }
 
 #' Create an external table

http://git-wip-us.apache.org/repos/asf/spark/blob/3e3151e7/R/pkg/inst/tests/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index d2d82e7..30edfc8 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -504,6 +504,19 @@ test_that(read.df() from json file, {
   df - read.df(sqlContext, jsonPath, json)
   expect_true(inherits(df, DataFrame))
   expect_true(count(df) == 3)
+
+  # Check if we can apply a user defined schema
+  schema - structType(structField(name, type = string),
+   structField(age, type = double))
+
+  df1 - read.df(sqlContext, jsonPath, json, schema)
+  expect_true(inherits(df1, DataFrame))
+  expect_equal(dtypes(df1), list(c(name, string), c(age, double)))
+
+  # Run the same with loadDF
+  df2 - loadDF(sqlContext, jsonPath, json, schema)
+  expect_true(inherits(df2, DataFrame))
+  expect_equal(dtypes(df2), list(c(name, string), c(age, double)))
 })
 
 test_that(write.df() as parquet file, {

http://git-wip-us.apache.org/repos/asf/spark/blob/3e3151e7/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
--
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
index 604f312..43b62f0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
@@ -139,4 +139,19 @@ private[r] object SQLUtils {
   case ignore = SaveMode.Ignore
 }
   }
+
+  def loadDF(
+  sqlContext: SQLContext

spark git commit: [SPARK-8027] [SPARKR] Move man pages creation to install-dev.sh

2015-06-04 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.4 81ff7a901 - 0b71b851d


[SPARK-8027] [SPARKR] Move man pages creation to install-dev.sh

This also helps us get rid of the sparkr-docs maven profile as docs are now 
built by just using -Psparkr when the roxygen2 package is available

Related to discussion in #6567

cc pwendell srowen -- Let me know if this looks better

Author: Shivaram Venkataraman shiva...@cs.berkeley.edu

Closes #6593 from shivaram/sparkr-pom-cleanup and squashes the following 
commits:

b282241 [Shivaram Venkataraman] Remove sparkr-docs from release script as well
8f100a5 [Shivaram Venkataraman] Move man pages creation to install-dev.sh This 
also helps us get rid of the sparkr-docs maven profile as docs are now built by 
just using -Psparkr when the roxygen2 package is available

(cherry picked from commit 3dc005282a694e105f40e429b28b0a677743341f)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0b71b851
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0b71b851
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0b71b851

Branch: refs/heads/branch-1.4
Commit: 0b71b851de8a1f97fe764b668337474661ee014e
Parents: 81ff7a9
Author: Shivaram Venkataraman shiva...@cs.berkeley.edu
Authored: Thu Jun 4 12:52:16 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Thu Jun 4 12:52:45 2015 -0700

--
 R/create-docs.sh |  5 +
 R/install-dev.sh |  9 -
 core/pom.xml | 23 ---
 dev/create-release/create-release.sh | 16 
 4 files changed, 17 insertions(+), 36 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0b71b851/R/create-docs.sh
--
diff --git a/R/create-docs.sh b/R/create-docs.sh
index af47c08..6a4687b 100755
--- a/R/create-docs.sh
+++ b/R/create-docs.sh
@@ -30,10 +30,7 @@ set -e
 export FWDIR=$(cd `dirname $0`; pwd)
 pushd $FWDIR
 
-# Generate Rd file
-Rscript -e 'library(devtools); devtools::document(pkg=./pkg, 
roclets=c(rd))'
-
-# Install the package
+# Install the package (this will also generate the Rd files)
 ./install-dev.sh
 
 # Now create HTML files

http://git-wip-us.apache.org/repos/asf/spark/blob/0b71b851/R/install-dev.sh
--
diff --git a/R/install-dev.sh b/R/install-dev.sh
index b9e2527..1edd551 100755
--- a/R/install-dev.sh
+++ b/R/install-dev.sh
@@ -34,5 +34,12 @@ LIB_DIR=$FWDIR/lib
 
 mkdir -p $LIB_DIR
 
-# Install R
+pushd $FWDIR
+
+# Generate Rd files if devtools is installed
+Rscript -e ' if(devtools %in% rownames(installed.packages())) { 
library(devtools); devtools::document(pkg=./pkg, roclets=c(rd)) }'
+
+# Install SparkR to $LIB_DIR
 R CMD INSTALL --library=$LIB_DIR $FWDIR/pkg/
+
+popd

http://git-wip-us.apache.org/repos/asf/spark/blob/0b71b851/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index a021842..5c02be8 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -481,29 +481,6 @@
 /plugins
   /build
 /profile
-profile
-  idsparkr-docs/id
-  build
-plugins
-  plugin
-groupIdorg.codehaus.mojo/groupId
-artifactIdexec-maven-plugin/artifactId
-executions
-  execution
-idsparkr-pkg-docs/id
-phasecompile/phase
-goals
-  goalexec/goal
-/goals
-  /execution
-/executions
-configuration
-  
executable..${path.separator}R${path.separator}create-docs${script.extension}/executable
-/configuration
-  /plugin
-/plugins
-  /build
-/profile
   /profiles
 
 /project

http://git-wip-us.apache.org/repos/asf/spark/blob/0b71b851/dev/create-release/create-release.sh
--
diff --git a/dev/create-release/create-release.sh 
b/dev/create-release/create-release.sh
index 0b14a61..54274a8 100755
--- a/dev/create-release/create-release.sh
+++ b/dev/create-release/create-release.sh
@@ -228,14 +228,14 @@ if [[ ! $@ =~ --skip-package ]]; then
 
   # We increment the Zinc port each time to avoid OOM's and other craziness if 
multiple builds
   # share the same Zinc server.
-  make_binary_release hadoop1 -Psparkr -Psparkr-docs -Phadoop-1 -Phive 
-Phive-thriftserver 3030 
-  make_binary_release hadoop1-scala2.11 -Psparkr -Psparkr-docs -Phadoop-1 
-Phive -Dscala-2.11 3031 
-  make_binary_release cdh4 -Psparkr -Psparkr-docs -Phadoop-1 -Phive 
-Phive

spark git commit: [SPARK-8027] [SPARKR] Move man pages creation to install-dev.sh

2015-06-04 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master cd3176bd8 - 3dc005282


[SPARK-8027] [SPARKR] Move man pages creation to install-dev.sh

This also helps us get rid of the sparkr-docs maven profile as docs are now 
built by just using -Psparkr when the roxygen2 package is available

Related to discussion in #6567

cc pwendell srowen -- Let me know if this looks better

Author: Shivaram Venkataraman shiva...@cs.berkeley.edu

Closes #6593 from shivaram/sparkr-pom-cleanup and squashes the following 
commits:

b282241 [Shivaram Venkataraman] Remove sparkr-docs from release script as well
8f100a5 [Shivaram Venkataraman] Move man pages creation to install-dev.sh This 
also helps us get rid of the sparkr-docs maven profile as docs are now built by 
just using -Psparkr when the roxygen2 package is available


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3dc00528
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3dc00528
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3dc00528

Branch: refs/heads/master
Commit: 3dc005282a694e105f40e429b28b0a677743341f
Parents: cd3176b
Author: Shivaram Venkataraman shiva...@cs.berkeley.edu
Authored: Thu Jun 4 12:52:16 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Thu Jun 4 12:52:16 2015 -0700

--
 R/create-docs.sh |  5 +
 R/install-dev.sh |  9 -
 core/pom.xml | 23 ---
 dev/create-release/create-release.sh | 16 
 4 files changed, 17 insertions(+), 36 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3dc00528/R/create-docs.sh
--
diff --git a/R/create-docs.sh b/R/create-docs.sh
index af47c08..6a4687b 100755
--- a/R/create-docs.sh
+++ b/R/create-docs.sh
@@ -30,10 +30,7 @@ set -e
 export FWDIR=$(cd `dirname $0`; pwd)
 pushd $FWDIR
 
-# Generate Rd file
-Rscript -e 'library(devtools); devtools::document(pkg=./pkg, 
roclets=c(rd))'
-
-# Install the package
+# Install the package (this will also generate the Rd files)
 ./install-dev.sh
 
 # Now create HTML files

http://git-wip-us.apache.org/repos/asf/spark/blob/3dc00528/R/install-dev.sh
--
diff --git a/R/install-dev.sh b/R/install-dev.sh
index b9e2527..1edd551 100755
--- a/R/install-dev.sh
+++ b/R/install-dev.sh
@@ -34,5 +34,12 @@ LIB_DIR=$FWDIR/lib
 
 mkdir -p $LIB_DIR
 
-# Install R
+pushd $FWDIR
+
+# Generate Rd files if devtools is installed
+Rscript -e ' if(devtools %in% rownames(installed.packages())) { 
library(devtools); devtools::document(pkg=./pkg, roclets=c(rd)) }'
+
+# Install SparkR to $LIB_DIR
 R CMD INSTALL --library=$LIB_DIR $FWDIR/pkg/
+
+popd

http://git-wip-us.apache.org/repos/asf/spark/blob/3dc00528/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index e35694e..40a64be 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -481,29 +481,6 @@
 /plugins
   /build
 /profile
-profile
-  idsparkr-docs/id
-  build
-plugins
-  plugin
-groupIdorg.codehaus.mojo/groupId
-artifactIdexec-maven-plugin/artifactId
-executions
-  execution
-idsparkr-pkg-docs/id
-phasecompile/phase
-goals
-  goalexec/goal
-/goals
-  /execution
-/executions
-configuration
-  
executable..${path.separator}R${path.separator}create-docs${script.extension}/executable
-/configuration
-  /plugin
-/plugins
-  /build
-/profile
   /profiles
 
 /project

http://git-wip-us.apache.org/repos/asf/spark/blob/3dc00528/dev/create-release/create-release.sh
--
diff --git a/dev/create-release/create-release.sh 
b/dev/create-release/create-release.sh
index 0b14a61..54274a8 100755
--- a/dev/create-release/create-release.sh
+++ b/dev/create-release/create-release.sh
@@ -228,14 +228,14 @@ if [[ ! $@ =~ --skip-package ]]; then
 
   # We increment the Zinc port each time to avoid OOM's and other craziness if 
multiple builds
   # share the same Zinc server.
-  make_binary_release hadoop1 -Psparkr -Psparkr-docs -Phadoop-1 -Phive 
-Phive-thriftserver 3030 
-  make_binary_release hadoop1-scala2.11 -Psparkr -Psparkr-docs -Phadoop-1 
-Phive -Dscala-2.11 3031 
-  make_binary_release cdh4 -Psparkr -Psparkr-docs -Phadoop-1 -Phive 
-Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0 3032 
-  make_binary_release hadoop2.3 -Psparkr -Psparkr-docs  -Phadoop-2.3 -Phive 
-Phive

spark git commit: [SPARK-8482] Added M4 instances to the list.

2015-06-22 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 42a1f716f - ba8a4537f


[SPARK-8482] Added M4 instances to the list.

AWS recently added M4 instances 
(https://aws.amazon.com/blogs/aws/the-new-m4-instance-type-bonus-price-reduction-on-m3-c4/).

Author: Pradeep Chhetri pradeep.chhetr...@gmail.com

Closes #6899 from pradeepchhetri/master and squashes the following commits:

4f4ea79 [Pradeep Chhetri] Added t2.large instance
3d2bb6c [Pradeep Chhetri] Added M4 instances to the list


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ba8a4537
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ba8a4537
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ba8a4537

Branch: refs/heads/master
Commit: ba8a4537fee7d85f968cccf8d1c607731daae307
Parents: 42a1f71
Author: Pradeep Chhetri pradeep.chhetr...@gmail.com
Authored: Mon Jun 22 11:45:31 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Mon Jun 22 11:45:31 2015 -0700

--
 ec2/spark_ec2.py | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ba8a4537/ec2/spark_ec2.py
--
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index 1037356..63e2c79 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -362,7 +362,7 @@ def get_validate_spark_version(version, repo):
 
 
 # Source: http://aws.amazon.com/amazon-linux-ami/instance-type-matrix/
-# Last Updated: 2015-05-08
+# Last Updated: 2015-06-19
 # For easy maintainability, please keep this manually-inputted dictionary 
sorted by key.
 EC2_INSTANCE_TYPES = {
 c1.medium:   pvm,
@@ -404,6 +404,11 @@ EC2_INSTANCE_TYPES = {
 m3.large:hvm,
 m3.xlarge:   hvm,
 m3.2xlarge:  hvm,
+m4.large:hvm,
+m4.xlarge:   hvm,
+m4.2xlarge:  hvm,
+m4.4xlarge:  hvm,
+m4.10xlarge: hvm,
 r3.large:hvm,
 r3.xlarge:   hvm,
 r3.2xlarge:  hvm,
@@ -413,6 +418,7 @@ EC2_INSTANCE_TYPES = {
 t2.micro:hvm,
 t2.small:hvm,
 t2.medium:   hvm,
+t2.large:hvm,
 }
 
 
@@ -923,7 +929,7 @@ def wait_for_cluster_state(conn, opts, cluster_instances, 
cluster_state):
 # Get number of local disks available for a given EC2 instance type.
 def get_num_disks(instance_type):
 # Source: 
http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html
-# Last Updated: 2015-05-08
+# Last Updated: 2015-06-19
 # For easy maintainability, please keep this manually-inputted dictionary 
sorted by key.
 disks_by_instance = {
 c1.medium:   1,
@@ -965,6 +971,11 @@ def get_num_disks(instance_type):
 m3.large:1,
 m3.xlarge:   2,
 m3.2xlarge:  2,
+m4.large:0,
+m4.xlarge:   0,
+m4.2xlarge:  0,
+m4.4xlarge:  0,
+m4.10xlarge: 0,
 r3.large:1,
 r3.xlarge:   1,
 r3.2xlarge:  1,
@@ -974,6 +985,7 @@ def get_num_disks(instance_type):
 t2.micro:0,
 t2.small:0,
 t2.medium:   0,
+t2.large:0,
 }
 if instance_type in disks_by_instance:
 return disks_by_instance[instance_type]


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8537] [SPARKR] Add a validation rule about the curly braces in SparkR to `.lintr`

2015-06-22 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master afe35f051 - b1f3a489e


[SPARK-8537] [SPARKR] Add a validation rule about the curly braces in SparkR to 
`.lintr`

[[SPARK-8537] Add a validation rule about the curly braces in SparkR to 
`.lintr` - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-8537)

Author: Yu ISHIKAWA yuu.ishik...@gmail.com

Closes #6940 from yu-iskw/SPARK-8537 and squashes the following commits:

7eec1a0 [Yu ISHIKAWA] [SPARK-8537][SparkR] Add a validation rule about the 
curly braces in SparkR to `.lintr`


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b1f3a489
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b1f3a489
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b1f3a489

Branch: refs/heads/master
Commit: b1f3a489efc6f4f9d172344c3345b9b38ae235e0
Parents: afe35f0
Author: Yu ISHIKAWA yuu.ishik...@gmail.com
Authored: Mon Jun 22 14:35:38 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Mon Jun 22 14:35:38 2015 -0700

--
 R/pkg/.lintr | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b1f3a489/R/pkg/.lintr
--
diff --git a/R/pkg/.lintr b/R/pkg/.lintr
index b10ebd3..038236f 100644
--- a/R/pkg/.lintr
+++ b/R/pkg/.lintr
@@ -1,2 +1,2 @@
-linters: with_defaults(line_length_linter(100), camel_case_linter = NULL)
+linters: with_defaults(line_length_linter(100), camel_case_linter = NULL, 
open_curly_linter(allow_single_line = TRUE), 
closed_curly_linter(allow_single_line = TRUE))
 exclusions: list(inst/profile/general.R = 1, inst/profile/shell.R)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8111] [SPARKR] SparkR shell should display Spark logo and version banner on startup.

2015-06-23 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master f2022fa0d - f2fb0285a


[SPARK-8111] [SPARKR] SparkR shell should display Spark logo and version banner 
on startup.

spark version is taken from the environment variable SPARK_VERSION

Author: Alok  Singh singhal@Aloks-MacBook-Pro.local
Author: Alok  Singh sing...@aloks-mbp.usca.ibm.com

Closes #6944 from aloknsingh/aloknsingh_spark_jiras and squashes the following 
commits:

ed607bd [Alok  Singh] [SPARK-8111][SparkR] As per suggestion, 1) using the 
version from sparkContext rather than the Sys.env. 2) change Welcome to 
SparkR! to Welcome to followed by Spark logo and version
acd5b85 [Alok  Singh] fix the jira SPARK-8111 to add the spark version and 
logo. Currently spark version is taken from the environment variable 
SPARK_VERSION


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f2fb0285
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f2fb0285
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f2fb0285

Branch: refs/heads/master
Commit: f2fb0285ab6d4225c5350f109dea6c1c017bb491
Parents: f2022fa
Author: Alok  Singh singhal@Aloks-MacBook-Pro.local
Authored: Tue Jun 23 12:47:55 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Tue Jun 23 12:47:55 2015 -0700

--
 R/pkg/inst/profile/shell.R | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f2fb0285/R/pkg/inst/profile/shell.R
--
diff --git a/R/pkg/inst/profile/shell.R b/R/pkg/inst/profile/shell.R
index 773b6ec..7189f1a 100644
--- a/R/pkg/inst/profile/shell.R
+++ b/R/pkg/inst/profile/shell.R
@@ -27,7 +27,21 @@
   sc - SparkR::sparkR.init()
   assign(sc, sc, envir=.GlobalEnv)
   sqlContext - SparkR::sparkRSQL.init(sc)
+  sparkVer - SparkR:::callJMethod(sc, version)
   assign(sqlContext, sqlContext, envir=.GlobalEnv)
-  cat(\n Welcome to SparkR!)
+  cat(\n Welcome to)
+  cat(\n)
+  cat(  __, \n)
+  cat(   / __/__  ___ _/ /__, \n)
+  cat(  _\\ \\/ _ \\/ _ `/ __/  '_/, \n)
+  cat( /___/ .__/\\_,_/_/ /_/\\_\\)
+  if (nchar(sparkVer) == 0) {
+cat(\n)
+  } else {
+cat(   version , sparkVer, \n) 
+  }
+  cat(/_/, \n)
+  cat(\n)
+
   cat(\n Spark context is available as sc, SQL context is available as 
sqlContext\n)
 }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8452] [SPARKR] expose jobGroup API in SparkR

2015-06-19 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 54976e55e - 1fa29c2df


[SPARK-8452] [SPARKR] expose jobGroup API in SparkR

This pull request adds following methods to SparkR:

```R
setJobGroup()
cancelJobGroup()
clearJobGroup()
```
For each method, the spark context is passed as the first argument. There does 
not seem to be a good way to test these in R.

cc shivaram and davies

Author: Hossein hoss...@databricks.com

Closes #6889 from falaki/SPARK-8452 and squashes the following commits:

9ce9f1e [Hossein] Added basic tests to verify methods can be called and won't 
throw errors
c706af9 [Hossein] Added examples
a2c19af [Hossein] taking spark context as first argument
343ca77 [Hossein] Added setJobGroup, cancelJobGroup and clearJobGroup to SparkR


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1fa29c2d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1fa29c2d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1fa29c2d

Branch: refs/heads/master
Commit: 1fa29c2df2a7846405eed6b409b8deb5329fa7c1
Parents: 54976e5
Author: Hossein hoss...@databricks.com
Authored: Fri Jun 19 15:47:22 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Fri Jun 19 15:51:59 2015 -0700

--
 R/pkg/NAMESPACE |  5 
 R/pkg/R/sparkR.R| 44 
 R/pkg/inst/tests/test_context.R |  7 ++
 3 files changed, 56 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1fa29c2d/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index f9447f6..7f85722 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -10,6 +10,11 @@ export(sparkR.init)
 export(sparkR.stop)
 export(print.jobj)
 
+# Job group lifecycle management methods
+export(setJobGroup,
+   clearJobGroup,
+   cancelJobGroup)
+
 exportClasses(DataFrame)
 
 exportMethods(arrange,

http://git-wip-us.apache.org/repos/asf/spark/blob/1fa29c2d/R/pkg/R/sparkR.R
--
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index 5ced7c6..2efd4f0 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -278,3 +278,47 @@ sparkRHive.init - function(jsc = NULL) {
   assign(.sparkRHivesc, hiveCtx, envir = .sparkREnv)
   hiveCtx
 }
+
+#' Assigns a group ID to all the jobs started by this thread until the group 
ID is set to a
+#' different value or cleared.
+#'
+#' @param sc existing spark context
+#' @param groupid the ID to be assigned to job groups
+#' @param description description for the the job group ID
+#' @param interruptOnCancel flag to indicate if the job is interrupted on job 
cancellation
+#' @examples
+#'\dontrun{
+#' sc - sparkR.init()
+#' setJobGroup(sc, myJobGroup, My job group description, TRUE)
+#'}
+
+setJobGroup - function(sc, groupId, description, interruptOnCancel) {
+  callJMethod(sc, setJobGroup, groupId, description, interruptOnCancel)
+}
+
+#' Clear current job group ID and its description
+#'
+#' @param sc existing spark context
+#' @examples
+#'\dontrun{
+#' sc - sparkR.init()
+#' clearJobGroup(sc)
+#'}
+
+clearJobGroup - function(sc) {
+  callJMethod(sc, clearJobGroup)
+}
+
+#' Cancel active jobs for the specified group
+#'
+#' @param sc existing spark context
+#' @param groupId the ID of job group to be cancelled
+#' @examples
+#'\dontrun{
+#' sc - sparkR.init()
+#' cancelJobGroup(sc, myJobGroup)
+#'}
+
+cancelJobGroup - function(sc, groupId) {
+  callJMethod(sc, cancelJobGroup, groupId)
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/1fa29c2d/R/pkg/inst/tests/test_context.R
--
diff --git a/R/pkg/inst/tests/test_context.R b/R/pkg/inst/tests/test_context.R
index e4aab37..513bbc8 100644
--- a/R/pkg/inst/tests/test_context.R
+++ b/R/pkg/inst/tests/test_context.R
@@ -48,3 +48,10 @@ test_that(rdd GC across sparkR.stop, {
   count(rdd3)
   count(rdd4)
 })
+
+test_that(job group functions can be called, {
+  sc - sparkR.init()
+  setJobGroup(sc, groupId, job description, TRUE)
+  cancelJobGroup(sc, groupId)
+  clearJobGroup(sc)
+})


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8637] [SPARKR] [HOTFIX] Fix packages argument, sparkSubmitBinName

2015-06-25 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.4 13802163d - 6abb4fc8a


[SPARK-8637] [SPARKR] [HOTFIX] Fix packages argument, sparkSubmitBinName

cc cafreeman

Author: Shivaram Venkataraman shiva...@cs.berkeley.edu

Closes #7022 from shivaram/sparkr-init-hotfix and squashes the following 
commits:

9178d15 [Shivaram Venkataraman] Fix packages argument, sparkSubmitBinName

(cherry picked from commit c392a9efabcb1ec2a2c53f001ecdae33c245ba35)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6abb4fc8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6abb4fc8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6abb4fc8

Branch: refs/heads/branch-1.4
Commit: 6abb4fc8a426f2554158802dd93f3223b6e2a304
Parents: 1380216
Author: Shivaram Venkataraman shiva...@cs.berkeley.edu
Authored: Thu Jun 25 10:56:00 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Thu Jun 25 10:56:08 2015 -0700

--
 R/pkg/R/client.R | 2 +-
 R/pkg/R/sparkR.R | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/6abb4fc8/R/pkg/R/client.R
--
diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R
index cf2e5dd..78c7a30 100644
--- a/R/pkg/R/client.R
+++ b/R/pkg/R/client.R
@@ -57,7 +57,7 @@ generateSparkSubmitArgs - function(args, sparkHome, jars, 
sparkSubmitOpts, pack
 }
 
 launchBackend - function(args, sparkHome, jars, sparkSubmitOpts, packages) {
-  sparkSubmitBin - determineSparkSubmitBin()
+  sparkSubmitBinName - determineSparkSubmitBin()
   if (sparkHome != ) {
 sparkSubmitBin - file.path(sparkHome, bin, sparkSubmitBinName)
   } else {

http://git-wip-us.apache.org/repos/asf/spark/blob/6abb4fc8/R/pkg/R/sparkR.R
--
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index 8f81d56..633b869 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -132,7 +132,7 @@ sparkR.init - function(
 sparkHome = sparkHome,
 jars = jars,
 sparkSubmitOpts = Sys.getenv(SPARKR_SUBMIT_ARGS, sparkr-shell),
-sparkPackages = sparkPackages)
+packages = sparkPackages)
 # wait atmost 100 seconds for JVM to launch
 wait - 0.1
 for (i in 1:25) {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8637] [SPARKR] [HOTFIX] Fix packages argument, sparkSubmitBinName

2015-06-25 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 2519dcc33 - c392a9efa


[SPARK-8637] [SPARKR] [HOTFIX] Fix packages argument, sparkSubmitBinName

cc cafreeman

Author: Shivaram Venkataraman shiva...@cs.berkeley.edu

Closes #7022 from shivaram/sparkr-init-hotfix and squashes the following 
commits:

9178d15 [Shivaram Venkataraman] Fix packages argument, sparkSubmitBinName


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c392a9ef
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c392a9ef
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c392a9ef

Branch: refs/heads/master
Commit: c392a9efabcb1ec2a2c53f001ecdae33c245ba35
Parents: 2519dcc
Author: Shivaram Venkataraman shiva...@cs.berkeley.edu
Authored: Thu Jun 25 10:56:00 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Thu Jun 25 10:56:00 2015 -0700

--
 R/pkg/R/client.R | 2 +-
 R/pkg/R/sparkR.R | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c392a9ef/R/pkg/R/client.R
--
diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R
index cf2e5dd..78c7a30 100644
--- a/R/pkg/R/client.R
+++ b/R/pkg/R/client.R
@@ -57,7 +57,7 @@ generateSparkSubmitArgs - function(args, sparkHome, jars, 
sparkSubmitOpts, pack
 }
 
 launchBackend - function(args, sparkHome, jars, sparkSubmitOpts, packages) {
-  sparkSubmitBin - determineSparkSubmitBin()
+  sparkSubmitBinName - determineSparkSubmitBin()
   if (sparkHome != ) {
 sparkSubmitBin - file.path(sparkHome, bin, sparkSubmitBinName)
   } else {

http://git-wip-us.apache.org/repos/asf/spark/blob/c392a9ef/R/pkg/R/sparkR.R
--
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index 8f81d56..633b869 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -132,7 +132,7 @@ sparkR.init - function(
 sparkHome = sparkHome,
 jars = jars,
 sparkSubmitOpts = Sys.getenv(SPARKR_SUBMIT_ARGS, sparkr-shell),
-sparkPackages = sparkPackages)
+packages = sparkPackages)
 # wait atmost 100 seconds for JVM to launch
 wait - 0.1
 for (i in 1:25) {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8429] [EC2] Add ability to set additional tags

2015-06-22 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 0818fdec3 - 42a1f716f


[SPARK-8429] [EC2] Add ability to set additional tags

Add the `--additional-tags` parameter that allows to set additional tags to all 
the created instances (masters and slaves).

The user can specify multiple tags by separating them with a comma (`,`), while 
each tag name and value should be separated by a colon (`:`); for example, 
`Task:MySparkProject,Env:production` would add two tags, `Task` and `Env`, with 
the given values.

Author: Stefano Parmesan s.parme...@gmail.com

Closes #6857 from armisael/patch-1 and squashes the following commits:

c5ac92c [Stefano Parmesan] python style (pep8)
8e614f1 [Stefano Parmesan] Set multiple tags in a single request
bfc56af [Stefano Parmesan] Address SPARK-7900 by inceasing sleep time
daf8615 [Stefano Parmesan] Add ability to set additional tags


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/42a1f716
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/42a1f716
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/42a1f716

Branch: refs/heads/master
Commit: 42a1f716fa35533507784be5e9117a984a03e62d
Parents: 0818fde
Author: Stefano Parmesan s.parme...@gmail.com
Authored: Mon Jun 22 11:43:10 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Mon Jun 22 11:43:10 2015 -0700

--
 ec2/spark_ec2.py | 28 
 1 file changed, 20 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/42a1f716/ec2/spark_ec2.py
--
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index 5608749..1037356 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -290,6 +290,10 @@ def parse_args():
 --additional-security-group, type=string, default=,
 help=Additional security group to place the machines in)
 parser.add_option(
+--additional-tags, type=string, default=,
+help=Additional tags to set on the machines; tags are 
comma-separated, while name and  +
+ value are colon separated; ex: 
\Task:MySparkProject,Env:production\)
+parser.add_option(
 --copy-aws-credentials, action=store_true, default=False,
 help=Add AWS credentials to hadoop configuration to allow Spark to 
access S3)
 parser.add_option(
@@ -684,16 +688,24 @@ def launch_cluster(conn, opts, cluster_name):
 
 # This wait time corresponds to SPARK-4983
 print(Waiting for AWS to propagate instance metadata...)
-time.sleep(5)
-# Give the instances descriptive names
+time.sleep(15)
+
+# Give the instances descriptive names and set additional tags
+additional_tags = {}
+if opts.additional_tags.strip():
+additional_tags = dict(
+map(str.strip, tag.split(':', 1)) for tag in 
opts.additional_tags.split(',')
+)
+
 for master in master_nodes:
-master.add_tag(
-key='Name',
-value='{cn}-master-{iid}'.format(cn=cluster_name, iid=master.id))
+master.add_tags(
+dict(additional_tags, 
Name='{cn}-master-{iid}'.format(cn=cluster_name, iid=master.id))
+)
+
 for slave in slave_nodes:
-slave.add_tag(
-key='Name',
-value='{cn}-slave-{iid}'.format(cn=cluster_name, iid=slave.id))
+slave.add_tags(
+dict(additional_tags, 
Name='{cn}-slave-{iid}'.format(cn=cluster_name, iid=slave.id))
+)
 
 # Return all the instances
 return (master_nodes, slave_nodes)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8548] [SPARKR] Remove the trailing whitespaces from the SparkR files

2015-06-22 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.4 d73900a90 - 250179485


[SPARK-8548] [SPARKR] Remove the trailing whitespaces from the SparkR files

[[SPARK-8548] Remove the trailing whitespaces from the SparkR files - ASF 
JIRA](https://issues.apache.org/jira/browse/SPARK-8548)

- This is the result of `lint-r`
https://gist.github.com/yu-iskw/0019b37a2c1167f33986

Author: Yu ISHIKAWA yuu.ishik...@gmail.com

Closes #6945 from yu-iskw/SPARK-8548 and squashes the following commits:

0bd567a [Yu ISHIKAWA] [SPARK-8548][SparkR] Remove the trailing whitespaces from 
the SparkR files

(cherry picked from commit 44fa7df64daa55bd6eb1f2c219a9701b34e1c2a3)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/25017948
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/25017948
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/25017948

Branch: refs/heads/branch-1.4
Commit: 250179485b59f3015fd2f44934b6cb1d3669de80
Parents: d73900a
Author: Yu ISHIKAWA yuu.ishik...@gmail.com
Authored: Mon Jun 22 20:55:38 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Mon Jun 22 20:55:55 2015 -0700

--
 R/pkg/R/DataFrame.R | 96 ++--
 R/pkg/R/RDD.R   | 48 +++---
 R/pkg/R/SQLContext.R| 14 ++--
 R/pkg/R/broadcast.R |  6 +-
 R/pkg/R/deserialize.R   |  2 +-
 R/pkg/R/generics.R  | 15 ++---
 R/pkg/R/group.R |  1 -
 R/pkg/R/jobj.R  |  2 +-
 R/pkg/R/pairRDD.R   |  4 +-
 R/pkg/R/schema.R|  2 +-
 R/pkg/R/serialize.R |  2 +-
 R/pkg/R/sparkR.R|  6 +-
 R/pkg/R/utils.R | 48 +++---
 R/pkg/R/zzz.R   |  1 -
 R/pkg/inst/tests/test_binaryFile.R  |  7 +-
 R/pkg/inst/tests/test_binary_function.R | 28 
 R/pkg/inst/tests/test_rdd.R | 12 ++--
 R/pkg/inst/tests/test_shuffle.R | 28 
 R/pkg/inst/tests/test_sparkSQL.R| 28 
 R/pkg/inst/tests/test_take.R|  1 -
 R/pkg/inst/tests/test_textFile.R|  7 +-
 R/pkg/inst/tests/test_utils.R   | 12 ++--
 22 files changed, 182 insertions(+), 188 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/25017948/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 0af5cb8..6feabf4 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -38,7 +38,7 @@ setClass(DataFrame,
 setMethod(initialize, DataFrame, function(.Object, sdf, isCached) {
   .Object@env - new.env()
   .Object@env$isCached - isCached
-  
+
   .Object@sdf - sdf
   .Object
 })
@@ -55,11 +55,11 @@ dataFrame - function(sdf, isCached = FALSE) {
  DataFrame Methods 
##
 
 #' Print Schema of a DataFrame
-#' 
+#'
 #' Prints out the schema in tree format
-#' 
+#'
 #' @param x A SparkSQL DataFrame
-#' 
+#'
 #' @rdname printSchema
 #' @export
 #' @examples
@@ -78,11 +78,11 @@ setMethod(printSchema,
   })
 
 #' Get schema object
-#' 
+#'
 #' Returns the schema of this DataFrame as a structType object.
-#' 
+#'
 #' @param x A SparkSQL DataFrame
-#' 
+#'
 #' @rdname schema
 #' @export
 #' @examples
@@ -100,9 +100,9 @@ setMethod(schema,
   })
 
 #' Explain
-#' 
+#'
 #' Print the logical and physical Catalyst plans to the console for debugging.
-#' 
+#'
 #' @param x A SparkSQL DataFrame
 #' @param extended Logical. If extended is False, explain() only prints the 
physical plan.
 #' @rdname explain
@@ -200,11 +200,11 @@ setMethod(show, DataFrame,
   })
 
 #' DataTypes
-#' 
+#'
 #' Return all column names and their data types as a list
-#' 
+#'
 #' @param x A SparkSQL DataFrame
-#' 
+#'
 #' @rdname dtypes
 #' @export
 #' @examples
@@ -224,11 +224,11 @@ setMethod(dtypes,
   })
 
 #' Column names
-#' 
+#'
 #' Return all column names as a list
-#' 
+#'
 #' @param x A SparkSQL DataFrame
-#' 
+#'
 #' @rdname columns
 #' @export
 #' @examples
@@ -256,12 +256,12 @@ setMethod(names,
   })
 
 #' Register Temporary Table
-#' 
+#'
 #' Registers a DataFrame as a Temporary Table in the SQLContext
-#' 
+#'
 #' @param x A SparkSQL DataFrame
 #' @param tableName A character vector containing the name of the table
-#' 
+#'
 #' @rdname registerTempTable
 #' @export
 #' @examples
@@ -306,11 +306,11 @@ setMethod(insertInto,
   })
 
 #' Cache
-#' 
+#'
 #' Persist with the default storage level (MEMORY_ONLY).
-#' 
+#'
 #' @param x

spark git commit: [SPARK-8548] [SPARKR] Remove the trailing whitespaces from the SparkR files

2015-06-22 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master c4d234396 - 44fa7df64


[SPARK-8548] [SPARKR] Remove the trailing whitespaces from the SparkR files

[[SPARK-8548] Remove the trailing whitespaces from the SparkR files - ASF 
JIRA](https://issues.apache.org/jira/browse/SPARK-8548)

- This is the result of `lint-r`
https://gist.github.com/yu-iskw/0019b37a2c1167f33986

Author: Yu ISHIKAWA yuu.ishik...@gmail.com

Closes #6945 from yu-iskw/SPARK-8548 and squashes the following commits:

0bd567a [Yu ISHIKAWA] [SPARK-8548][SparkR] Remove the trailing whitespaces from 
the SparkR files


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/44fa7df6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/44fa7df6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/44fa7df6

Branch: refs/heads/master
Commit: 44fa7df64daa55bd6eb1f2c219a9701b34e1c2a3
Parents: c4d2343
Author: Yu ISHIKAWA yuu.ishik...@gmail.com
Authored: Mon Jun 22 20:55:38 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Mon Jun 22 20:55:38 2015 -0700

--
 R/pkg/R/DataFrame.R | 96 ++--
 R/pkg/R/RDD.R   | 48 +++---
 R/pkg/R/SQLContext.R| 14 ++--
 R/pkg/R/broadcast.R |  6 +-
 R/pkg/R/deserialize.R   |  2 +-
 R/pkg/R/generics.R  | 15 ++---
 R/pkg/R/group.R |  1 -
 R/pkg/R/jobj.R  |  2 +-
 R/pkg/R/pairRDD.R   |  4 +-
 R/pkg/R/schema.R|  2 +-
 R/pkg/R/serialize.R |  2 +-
 R/pkg/R/sparkR.R|  6 +-
 R/pkg/R/utils.R | 48 +++---
 R/pkg/R/zzz.R   |  1 -
 R/pkg/inst/tests/test_binaryFile.R  |  7 +-
 R/pkg/inst/tests/test_binary_function.R | 28 
 R/pkg/inst/tests/test_rdd.R | 12 ++--
 R/pkg/inst/tests/test_shuffle.R | 28 
 R/pkg/inst/tests/test_sparkSQL.R| 28 
 R/pkg/inst/tests/test_take.R|  1 -
 R/pkg/inst/tests/test_textFile.R|  7 +-
 R/pkg/inst/tests/test_utils.R   | 12 ++--
 22 files changed, 182 insertions(+), 188 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/44fa7df6/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 0af5cb8..6feabf4 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -38,7 +38,7 @@ setClass(DataFrame,
 setMethod(initialize, DataFrame, function(.Object, sdf, isCached) {
   .Object@env - new.env()
   .Object@env$isCached - isCached
-  
+
   .Object@sdf - sdf
   .Object
 })
@@ -55,11 +55,11 @@ dataFrame - function(sdf, isCached = FALSE) {
  DataFrame Methods 
##
 
 #' Print Schema of a DataFrame
-#' 
+#'
 #' Prints out the schema in tree format
-#' 
+#'
 #' @param x A SparkSQL DataFrame
-#' 
+#'
 #' @rdname printSchema
 #' @export
 #' @examples
@@ -78,11 +78,11 @@ setMethod(printSchema,
   })
 
 #' Get schema object
-#' 
+#'
 #' Returns the schema of this DataFrame as a structType object.
-#' 
+#'
 #' @param x A SparkSQL DataFrame
-#' 
+#'
 #' @rdname schema
 #' @export
 #' @examples
@@ -100,9 +100,9 @@ setMethod(schema,
   })
 
 #' Explain
-#' 
+#'
 #' Print the logical and physical Catalyst plans to the console for debugging.
-#' 
+#'
 #' @param x A SparkSQL DataFrame
 #' @param extended Logical. If extended is False, explain() only prints the 
physical plan.
 #' @rdname explain
@@ -200,11 +200,11 @@ setMethod(show, DataFrame,
   })
 
 #' DataTypes
-#' 
+#'
 #' Return all column names and their data types as a list
-#' 
+#'
 #' @param x A SparkSQL DataFrame
-#' 
+#'
 #' @rdname dtypes
 #' @export
 #' @examples
@@ -224,11 +224,11 @@ setMethod(dtypes,
   })
 
 #' Column names
-#' 
+#'
 #' Return all column names as a list
-#' 
+#'
 #' @param x A SparkSQL DataFrame
-#' 
+#'
 #' @rdname columns
 #' @export
 #' @examples
@@ -256,12 +256,12 @@ setMethod(names,
   })
 
 #' Register Temporary Table
-#' 
+#'
 #' Registers a DataFrame as a Temporary Table in the SQLContext
-#' 
+#'
 #' @param x A SparkSQL DataFrame
 #' @param tableName A character vector containing the name of the table
-#' 
+#'
 #' @rdname registerTempTable
 #' @export
 #' @examples
@@ -306,11 +306,11 @@ setMethod(insertInto,
   })
 
 #' Cache
-#' 
+#'
 #' Persist with the default storage level (MEMORY_ONLY).
-#' 
+#'
 #' @param x A SparkSQL DataFrame
-#' 
+#'
 #' @rdname cache-methods
 #' @export
 #' @examples
@@ -400,7 +400,7 @@ setMethod(repartition

spark git commit: [SPARK-8662] SparkR Update SparkSQL Test

2015-06-26 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.4 6abb4fc8a - 78b31a2a6


[SPARK-8662] SparkR Update SparkSQL Test

Test `infer_type` using a more fine-grained approach rather than comparing 
environments. Since `all.equal`'s behavior has changed in R 3.2, the test 
became unpassable.

JIRA here:
https://issues.apache.org/jira/browse/SPARK-8662

Author: cafreeman cfree...@alteryx.com

Closes #7045 from cafreeman/R32_Test and squashes the following commits:

b97cc52 [cafreeman] Add `checkStructField` utility
3381e5c [cafreeman] Update SparkSQL Test


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/78b31a2a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/78b31a2a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/78b31a2a

Branch: refs/heads/branch-1.4
Commit: 78b31a2a630c2178987322d0221aeea183ec565f
Parents: 6abb4fc
Author: cafreeman cfree...@alteryx.com
Authored: Fri Jun 26 10:07:35 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Fri Jun 26 10:07:35 2015 -0700

--
 R/pkg/inst/tests/test_sparkSQL.R | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/78b31a2a/R/pkg/inst/tests/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index fc7f3f0..52fb7f8 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -19,6 +19,14 @@ library(testthat)
 
 context(SparkSQL functions)
 
+# Utility function for easily checking the values of a StructField
+checkStructField - function(actual, expectedName, expectedType, 
expectedNullable) {
+  expect_equal(class(actual), structField)
+  expect_equal(actual$name(), expectedName)
+  expect_equal(actual$dataType.toString(), expectedType)
+  expect_equal(actual$nullable(), expectedNullable)
+}
+
 # Tests for SparkSQL functions in SparkR
 
 sc - sparkR.init()
@@ -52,9 +60,10 @@ test_that(infer types, {
list(type = 'array', elementType = integer, containsNull = 
TRUE))
   expect_equal(infer_type(list(1L, 2L)),
list(type = 'array', elementType = integer, containsNull = 
TRUE))
-  expect_equal(infer_type(list(a = 1L, b = 2)),
-   structType(structField(x = a, type = integer, nullable = 
TRUE),
-  structField(x = b, type = string, nullable = 
TRUE)))
+  testStruct - infer_type(list(a = 1L, b = 2))
+  expect_true(class(testStruct) == structType)
+  checkStructField(testStruct$fields()[[1]], a, IntegerType, TRUE)
+  checkStructField(testStruct$fields()[[2]], b, StringType, TRUE)
   e - new.env()
   assign(a, 1L, envir = e)
   expect_equal(infer_type(e),


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8662] SparkR Update SparkSQL Test

2015-06-26 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 41afa1650 - a56516fc9


[SPARK-8662] SparkR Update SparkSQL Test

Test `infer_type` using a more fine-grained approach rather than comparing 
environments. Since `all.equal`'s behavior has changed in R 3.2, the test 
became unpassable.

JIRA here:
https://issues.apache.org/jira/browse/SPARK-8662

Author: cafreeman cfree...@alteryx.com

Closes #7045 from cafreeman/R32_Test and squashes the following commits:

b97cc52 [cafreeman] Add `checkStructField` utility
3381e5c [cafreeman] Update SparkSQL Test

(cherry picked from commit 78b31a2a630c2178987322d0221aeea183ec565f)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a56516fc
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a56516fc
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a56516fc

Branch: refs/heads/master
Commit: a56516fc9280724db8fdef8e7d109ed7e28e427d
Parents: 41afa16
Author: cafreeman cfree...@alteryx.com
Authored: Fri Jun 26 10:07:35 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Fri Jun 26 10:07:49 2015 -0700

--
 R/pkg/inst/tests/test_sparkSQL.R | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a56516fc/R/pkg/inst/tests/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 417153d..6a08f89 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -19,6 +19,14 @@ library(testthat)
 
 context(SparkSQL functions)
 
+# Utility function for easily checking the values of a StructField
+checkStructField - function(actual, expectedName, expectedType, 
expectedNullable) {
+  expect_equal(class(actual), structField)
+  expect_equal(actual$name(), expectedName)
+  expect_equal(actual$dataType.toString(), expectedType)
+  expect_equal(actual$nullable(), expectedNullable)
+}
+
 # Tests for SparkSQL functions in SparkR
 
 sc - sparkR.init()
@@ -52,9 +60,10 @@ test_that(infer types, {
list(type = 'array', elementType = integer, containsNull = 
TRUE))
   expect_equal(infer_type(list(1L, 2L)),
list(type = 'array', elementType = integer, containsNull = 
TRUE))
-  expect_equal(infer_type(list(a = 1L, b = 2)),
-   structType(structField(x = a, type = integer, nullable = 
TRUE),
-  structField(x = b, type = string, nullable = 
TRUE)))
+  testStruct - infer_type(list(a = 1L, b = 2))
+  expect_true(class(testStruct) == structType)
+  checkStructField(testStruct$fields()[[1]], a, IntegerType, TRUE)
+  checkStructField(testStruct$fields()[[2]], b, StringType, TRUE)
   e - new.env()
   assign(a, 1L, envir = e)
   expect_equal(infer_type(e),


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8495] [SPARKR] Add a `.lintr` file to validate the SparkR files and the `lint-r` script

2015-06-20 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 7a3c424ec - 004f57374


[SPARK-8495] [SPARKR] Add a `.lintr` file to validate the SparkR files and the 
`lint-r` script

Thank Shivaram Venkataraman for your support. This is a prototype script to 
validate the R files.

Author: Yu ISHIKAWA yuu.ishik...@gmail.com

Closes #6922 from yu-iskw/SPARK-6813 and squashes the following commits:

c1ffe6b [Yu ISHIKAWA] Modify to save result to a log file and add a rule to 
validate
5520806 [Yu ISHIKAWA] Exclude the .lintr file not to check Apache lincence
8f94680 [Yu ISHIKAWA] [SPARK-8495][SparkR] Add a `.lintr` file to validate the 
SparkR files and the `lint-r` script


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/004f5737
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/004f5737
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/004f5737

Branch: refs/heads/master
Commit: 004f57374b98c4df32d9f1e19221f68e92639a49
Parents: 7a3c424
Author: Yu ISHIKAWA yuu.ishik...@gmail.com
Authored: Sat Jun 20 16:10:14 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Sat Jun 20 16:10:14 2015 -0700

--
 .gitignore|  1 +
 .rat-excludes |  1 +
 R/pkg/.lintr  |  2 ++
 dev/lint-r| 30 ++
 dev/lint-r.R  | 29 +
 5 files changed, 63 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/004f5737/.gitignore
--
diff --git a/.gitignore b/.gitignore
index 3624d12..debad77 100644
--- a/.gitignore
+++ b/.gitignore
@@ -66,6 +66,7 @@ scalastyle-output.xml
 R-unit-tests.log
 R/unit-tests.out
 python/lib/pyspark.zip
+lint-r-report.log
 
 # For Hive
 metastore_db/

http://git-wip-us.apache.org/repos/asf/spark/blob/004f5737/.rat-excludes
--
diff --git a/.rat-excludes b/.rat-excludes
index aa008e6..c24667c 100644
--- a/.rat-excludes
+++ b/.rat-excludes
@@ -86,3 +86,4 @@ local-1430917381535_2
 DESCRIPTION
 NAMESPACE
 test_support/*
+.lintr

http://git-wip-us.apache.org/repos/asf/spark/blob/004f5737/R/pkg/.lintr
--
diff --git a/R/pkg/.lintr b/R/pkg/.lintr
new file mode 100644
index 000..b10ebd3
--- /dev/null
+++ b/R/pkg/.lintr
@@ -0,0 +1,2 @@
+linters: with_defaults(line_length_linter(100), camel_case_linter = NULL)
+exclusions: list(inst/profile/general.R = 1, inst/profile/shell.R)

http://git-wip-us.apache.org/repos/asf/spark/blob/004f5737/dev/lint-r
--
diff --git a/dev/lint-r b/dev/lint-r
new file mode 100755
index 000..7d5f4cd
--- /dev/null
+++ b/dev/lint-r
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the License); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+SCRIPT_DIR=$( cd $( dirname $0 )  pwd )
+SPARK_ROOT_DIR=$(dirname $SCRIPT_DIR)
+LINT_R_REPORT_FILE_NAME=$SPARK_ROOT_DIR/dev/lint-r-report.log
+
+
+if ! type Rscript  /dev/null; then
+  echo ERROR: You should install R
+  exit
+fi
+
+`which Rscript` --vanilla $SPARK_ROOT_DIR/dev/lint-r.R $SPARK_ROOT_DIR | 
tee $LINT_R_REPORT_FILE_NAME

http://git-wip-us.apache.org/repos/asf/spark/blob/004f5737/dev/lint-r.R
--
diff --git a/dev/lint-r.R b/dev/lint-r.R
new file mode 100644
index 000..dcb1a18
--- /dev/null
+++ b/dev/lint-r.R
@@ -0,0 +1,29 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the License); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under

spark git commit: [SPARK-8576] Add spark-ec2 options to set IAM roles and instance-initiated shutdown behavior

2015-06-24 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master bba6699d0 - 31f48e5af


[SPARK-8576] Add spark-ec2 options to set IAM roles and instance-initiated 
shutdown behavior

Both of these options are useful when spark-ec2 is being used as part of an 
automated pipeline and the engineers want to minimize the need to pass around 
AWS keys for access to things like S3 (keys are replaced by the IAM role) and 
to be able to launch a cluster that can terminate itself cleanly.

Author: Nicholas Chammas nicholas.cham...@gmail.com

Closes #6962 from nchammas/additional-ec2-options and squashes the following 
commits:

fcf252e [Nicholas Chammas] PEP8 fixes
efba9ee [Nicholas Chammas] add help for --instance-initiated-shutdown-behavior
598aecf [Nicholas Chammas] option to launch instances into IAM role
2743632 [Nicholas Chammas] add option for instance initiated shutdown


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/31f48e5a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/31f48e5a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/31f48e5a

Branch: refs/heads/master
Commit: 31f48e5af887a9ccc9cea0218c36bf52bbf49d24
Parents: bba6699
Author: Nicholas Chammas nicholas.cham...@gmail.com
Authored: Wed Jun 24 11:20:51 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Wed Jun 24 11:20:51 2015 -0700

--
 ec2/spark_ec2.py | 56 ---
 1 file changed, 35 insertions(+), 21 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/31f48e5a/ec2/spark_ec2.py
--
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index 63e2c79..e4932cf 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -306,6 +306,13 @@ def parse_args():
 --private-ips, action=store_true, default=False,
 help=Use private IPs for instances rather than public if VPC/subnet  
+
  requires that.)
+parser.add_option(
+--instance-initiated-shutdown-behavior, default=stop,
+choices=[stop, terminate],
+help=Whether instances should terminate when shut down or just stop)
+parser.add_option(
+--instance-profile-name, default=None,
+help=IAM profile name to launch instances under)
 
 (opts, args) = parser.parse_args()
 if len(args) != 2:
@@ -602,7 +609,8 @@ def launch_cluster(conn, opts, cluster_name):
 block_device_map=block_map,
 subnet_id=opts.subnet_id,
 placement_group=opts.placement_group,
-user_data=user_data_content)
+user_data=user_data_content,
+instance_profile_name=opts.instance_profile_name)
 my_req_ids += [req.id for req in slave_reqs]
 i += 1
 
@@ -647,16 +655,19 @@ def launch_cluster(conn, opts, cluster_name):
 for zone in zones:
 num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
 if num_slaves_this_zone  0:
-slave_res = image.run(key_name=opts.key_pair,
-  security_group_ids=[slave_group.id] + 
additional_group_ids,
-  instance_type=opts.instance_type,
-  placement=zone,
-  min_count=num_slaves_this_zone,
-  max_count=num_slaves_this_zone,
-  block_device_map=block_map,
-  subnet_id=opts.subnet_id,
-  placement_group=opts.placement_group,
-  user_data=user_data_content)
+slave_res = image.run(
+key_name=opts.key_pair,
+security_group_ids=[slave_group.id] + additional_group_ids,
+instance_type=opts.instance_type,
+placement=zone,
+min_count=num_slaves_this_zone,
+max_count=num_slaves_this_zone,
+block_device_map=block_map,
+subnet_id=opts.subnet_id,
+placement_group=opts.placement_group,
+user_data=user_data_content,
+
instance_initiated_shutdown_behavior=opts.instance_initiated_shutdown_behavior,
+instance_profile_name=opts.instance_profile_name)
 slave_nodes += slave_res.instances
 print(Launched {s} slave{plural_s} in {z}, regid = 
{r}.format(
   s=num_slaves_this_zone,
@@ -678,16 +689,19 @@ def launch_cluster(conn, opts, cluster_name):
 master_type = opts.instance_type
 if opts.zone

spark git commit: [SPARK-8506] Add pakages to R context created through init.

2015-06-24 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 1173483f3 - 43e66192f


[SPARK-8506] Add pakages to R context created through init.

Author: Holden Karau hol...@pigscanfly.ca

Closes #6928 from 
holdenk/SPARK-8506-sparkr-does-not-provide-an-easy-way-to-depend-on-spark-packages-when-performing-init-from-inside-of-r
 and squashes the following commits:

b60dd63 [Holden Karau] Add an example with the spark-csv package
fa8bc92 [Holden Karau] typo: sparm - spark
865a90c [Holden Karau] strip spaces for comparision
c7a4471 [Holden Karau] Add some documentation
c1a9233 [Holden Karau] refactor for testing
c818556 [Holden Karau] Add pakages to R


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/43e66192
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/43e66192
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/43e66192

Branch: refs/heads/master
Commit: 43e66192f45a23f7232116e9f664158862df5015
Parents: 1173483
Author: Holden Karau hol...@pigscanfly.ca
Authored: Wed Jun 24 11:55:20 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Wed Jun 24 11:55:20 2015 -0700

--
 R/pkg/R/client.R   | 26 +++---
 R/pkg/R/sparkR.R   |  7 +--
 R/pkg/inst/tests/test_client.R | 32 
 docs/sparkr.md | 17 +
 4 files changed, 69 insertions(+), 13 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/43e66192/R/pkg/R/client.R
--
diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R
index 1281c41..cf2e5dd 100644
--- a/R/pkg/R/client.R
+++ b/R/pkg/R/client.R
@@ -34,24 +34,36 @@ connectBackend - function(hostname, port, timeout = 6000) {
   con
 }
 
-launchBackend - function(args, sparkHome, jars, sparkSubmitOpts) {
+determineSparkSubmitBin - function() {
   if (.Platform$OS.type == unix) {
 sparkSubmitBinName = spark-submit
   } else {
 sparkSubmitBinName = spark-submit.cmd
   }
+  sparkSubmitBinName
+}
+
+generateSparkSubmitArgs - function(args, sparkHome, jars, sparkSubmitOpts, 
packages) {
+  if (jars != ) {
+jars - paste(--jars, jars)
+  }
+
+  if (packages != ) {
+packages - paste(--packages, packages)
+  }
 
+  combinedArgs - paste(jars, packages, sparkSubmitOpts, args, sep =  )
+  combinedArgs
+}
+
+launchBackend - function(args, sparkHome, jars, sparkSubmitOpts, packages) {
+  sparkSubmitBin - determineSparkSubmitBin()
   if (sparkHome != ) {
 sparkSubmitBin - file.path(sparkHome, bin, sparkSubmitBinName)
   } else {
 sparkSubmitBin - sparkSubmitBinName
   }
-
-  if (jars != ) {
-jars - paste(--jars, jars)
-  }
-
-  combinedArgs - paste(jars, sparkSubmitOpts, args, sep =  )
+  combinedArgs - generateSparkSubmitArgs(args, sparkHome, jars, 
sparkSubmitOpts, packages)
   cat(Launching java with spark-submit command, sparkSubmitBin, 
combinedArgs, \n)
   invisible(system2(sparkSubmitBin, combinedArgs, wait = F))
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/43e66192/R/pkg/R/sparkR.R
--
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index dbde0c4..8f81d56 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -81,6 +81,7 @@ sparkR.stop - function() {
 #' @param sparkExecutorEnv Named list of environment variables to be used when 
launching executors.
 #' @param sparkJars Character string vector of jar files to pass to the worker 
nodes.
 #' @param sparkRLibDir The path where R is installed on the worker nodes.
+#' @param sparkPackages Character string vector of packages from 
spark-packages.org
 #' @export
 #' @examples
 #'\dontrun{
@@ -100,7 +101,8 @@ sparkR.init - function(
   sparkEnvir = list(),
   sparkExecutorEnv = list(),
   sparkJars = ,
-  sparkRLibDir = ) {
+  sparkRLibDir = ,
+  sparkPackages = ) {
 
   if (exists(.sparkRjsc, envir = .sparkREnv)) {
 cat(Re-using existing Spark Context. Please stop SparkR with 
sparkR.stop() or restart R to create a new Spark Context\n)
@@ -129,7 +131,8 @@ sparkR.init - function(
 args = path,
 sparkHome = sparkHome,
 jars = jars,
-sparkSubmitOpts = Sys.getenv(SPARKR_SUBMIT_ARGS, sparkr-shell))
+sparkSubmitOpts = Sys.getenv(SPARKR_SUBMIT_ARGS, sparkr-shell),
+sparkPackages = sparkPackages)
 # wait atmost 100 seconds for JVM to launch
 wait - 0.1
 for (i in 1:25) {

http://git-wip-us.apache.org/repos/asf/spark/blob/43e66192/R/pkg/inst/tests/test_client.R
--
diff --git a/R/pkg/inst/tests/test_client.R b/R/pkg/inst/tests/test_client.R
new file mode 100644
index 000..30b05c1
--- /dev/null
+++ b/R/pkg/inst/tests/test_client.R

spark git commit: [SPARK-8506] Add pakages to R context created through init.

2015-06-24 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.4 7e53ff258 - f6682dd6e


[SPARK-8506] Add pakages to R context created through init.

Author: Holden Karau hol...@pigscanfly.ca

Closes #6928 from 
holdenk/SPARK-8506-sparkr-does-not-provide-an-easy-way-to-depend-on-spark-packages-when-performing-init-from-inside-of-r
 and squashes the following commits:

b60dd63 [Holden Karau] Add an example with the spark-csv package
fa8bc92 [Holden Karau] typo: sparm - spark
865a90c [Holden Karau] strip spaces for comparision
c7a4471 [Holden Karau] Add some documentation
c1a9233 [Holden Karau] refactor for testing
c818556 [Holden Karau] Add pakages to R

(cherry picked from commit 43e66192f45a23f7232116e9f664158862df5015)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f6682dd6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f6682dd6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f6682dd6

Branch: refs/heads/branch-1.4
Commit: f6682dd6e8ab8c5acddd1cf20317bea3afcbcae7
Parents: 7e53ff2
Author: Holden Karau hol...@pigscanfly.ca
Authored: Wed Jun 24 11:55:20 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Wed Jun 24 11:55:29 2015 -0700

--
 R/pkg/R/client.R   | 26 +++---
 R/pkg/R/sparkR.R   |  7 +--
 R/pkg/inst/tests/test_client.R | 32 
 docs/sparkr.md | 17 +
 4 files changed, 69 insertions(+), 13 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f6682dd6/R/pkg/R/client.R
--
diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R
index 1281c41..cf2e5dd 100644
--- a/R/pkg/R/client.R
+++ b/R/pkg/R/client.R
@@ -34,24 +34,36 @@ connectBackend - function(hostname, port, timeout = 6000) {
   con
 }
 
-launchBackend - function(args, sparkHome, jars, sparkSubmitOpts) {
+determineSparkSubmitBin - function() {
   if (.Platform$OS.type == unix) {
 sparkSubmitBinName = spark-submit
   } else {
 sparkSubmitBinName = spark-submit.cmd
   }
+  sparkSubmitBinName
+}
+
+generateSparkSubmitArgs - function(args, sparkHome, jars, sparkSubmitOpts, 
packages) {
+  if (jars != ) {
+jars - paste(--jars, jars)
+  }
+
+  if (packages != ) {
+packages - paste(--packages, packages)
+  }
 
+  combinedArgs - paste(jars, packages, sparkSubmitOpts, args, sep =  )
+  combinedArgs
+}
+
+launchBackend - function(args, sparkHome, jars, sparkSubmitOpts, packages) {
+  sparkSubmitBin - determineSparkSubmitBin()
   if (sparkHome != ) {
 sparkSubmitBin - file.path(sparkHome, bin, sparkSubmitBinName)
   } else {
 sparkSubmitBin - sparkSubmitBinName
   }
-
-  if (jars != ) {
-jars - paste(--jars, jars)
-  }
-
-  combinedArgs - paste(jars, sparkSubmitOpts, args, sep =  )
+  combinedArgs - generateSparkSubmitArgs(args, sparkHome, jars, 
sparkSubmitOpts, packages)
   cat(Launching java with spark-submit command, sparkSubmitBin, 
combinedArgs, \n)
   invisible(system2(sparkSubmitBin, combinedArgs, wait = F))
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/f6682dd6/R/pkg/R/sparkR.R
--
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index dbde0c4..8f81d56 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -81,6 +81,7 @@ sparkR.stop - function() {
 #' @param sparkExecutorEnv Named list of environment variables to be used when 
launching executors.
 #' @param sparkJars Character string vector of jar files to pass to the worker 
nodes.
 #' @param sparkRLibDir The path where R is installed on the worker nodes.
+#' @param sparkPackages Character string vector of packages from 
spark-packages.org
 #' @export
 #' @examples
 #'\dontrun{
@@ -100,7 +101,8 @@ sparkR.init - function(
   sparkEnvir = list(),
   sparkExecutorEnv = list(),
   sparkJars = ,
-  sparkRLibDir = ) {
+  sparkRLibDir = ,
+  sparkPackages = ) {
 
   if (exists(.sparkRjsc, envir = .sparkREnv)) {
 cat(Re-using existing Spark Context. Please stop SparkR with 
sparkR.stop() or restart R to create a new Spark Context\n)
@@ -129,7 +131,8 @@ sparkR.init - function(
 args = path,
 sparkHome = sparkHome,
 jars = jars,
-sparkSubmitOpts = Sys.getenv(SPARKR_SUBMIT_ARGS, sparkr-shell))
+sparkSubmitOpts = Sys.getenv(SPARKR_SUBMIT_ARGS, sparkr-shell),
+sparkPackages = sparkPackages)
 # wait atmost 100 seconds for JVM to launch
 wait - 0.1
 for (i in 1:25) {

http://git-wip-us.apache.org/repos/asf/spark/blob/f6682dd6/R/pkg/inst/tests/test_client.R
--
diff --git a/R/pkg/inst/tests

spark git commit: [SPARK-8607] SparkR -- jars not being added to application classpath correctly

2015-06-26 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master a56516fc9 - 9d1181776


[SPARK-8607] SparkR -- jars not being added to application classpath correctly

Add `getStaticClass` method in SparkR's `RBackendHandler`

This is a fix for the problem referenced in 
[SPARK-5185](https://issues.apache.org/jira/browse/SPARK-5185).

cc shivaram

Author: cafreeman cfree...@alteryx.com

Closes #7001 from cafreeman/branch-1.4 and squashes the following commits:

8f81194 [cafreeman] Add missing license
31aedcf [cafreeman] Refactor test to call an external R script
2c22073 [cafreeman] Merge branch 'branch-1.4' of github.com:apache/spark into 
branch-1.4
0bea809 [cafreeman] Fixed relative path issue and added smaller JAR
ee25e60 [cafreeman] Merge branch 'branch-1.4' of github.com:apache/spark into 
branch-1.4
9a5c362 [cafreeman] test for including JAR when launching sparkContext
9101223 [cafreeman] Merge branch 'branch-1.4' of github.com:apache/spark into 
branch-1.4
5a80844 [cafreeman] Fix style nits
7c6bd0c [cafreeman] [SPARK-8607] SparkR

(cherry picked from commit 2579948bf5d89ac2d822ace605a6a4afce5258d6)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9d118177
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9d118177
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9d118177

Branch: refs/heads/master
Commit: 9d11817765e2817b11b73c61bae3b32c9f119cfd
Parents: a56516f
Author: cafreeman cfree...@alteryx.com
Authored: Fri Jun 26 17:06:02 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Fri Jun 26 17:06:16 2015 -0700

--
 .../inst/test_support/sparktestjar_2.10-1.0.jar | Bin 0 - 2886 bytes
 R/pkg/inst/tests/jarTest.R  |  32 
 R/pkg/inst/tests/test_includeJAR.R  |  37 +++
 .../apache/spark/api/r/RBackendHandler.scala|  17 -
 4 files changed, 85 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9d118177/R/pkg/inst/test_support/sparktestjar_2.10-1.0.jar
--
diff --git a/R/pkg/inst/test_support/sparktestjar_2.10-1.0.jar 
b/R/pkg/inst/test_support/sparktestjar_2.10-1.0.jar
new file mode 100644
index 000..1d5c2af
Binary files /dev/null and b/R/pkg/inst/test_support/sparktestjar_2.10-1.0.jar 
differ

http://git-wip-us.apache.org/repos/asf/spark/blob/9d118177/R/pkg/inst/tests/jarTest.R
--
diff --git a/R/pkg/inst/tests/jarTest.R b/R/pkg/inst/tests/jarTest.R
new file mode 100644
index 000..d68bb20
--- /dev/null
+++ b/R/pkg/inst/tests/jarTest.R
@@ -0,0 +1,32 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the License); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+library(SparkR)
+
+sc - sparkR.init()
+
+helloTest - SparkR:::callJStatic(sparkR.test.hello,
+  helloWorld,
+  Dave)
+
+basicFunction - SparkR:::callJStatic(sparkR.test.basicFunction,
+  addStuff,
+  2L,
+  2L)
+
+sparkR.stop()
+output - c(helloTest, basicFunction)
+writeLines(output)

http://git-wip-us.apache.org/repos/asf/spark/blob/9d118177/R/pkg/inst/tests/test_includeJAR.R
--
diff --git a/R/pkg/inst/tests/test_includeJAR.R 
b/R/pkg/inst/tests/test_includeJAR.R
new file mode 100644
index 000..8bc693b
--- /dev/null
+++ b/R/pkg/inst/tests/test_includeJAR.R
@@ -0,0 +1,37 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the License); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License

spark git commit: [SPARK-8607] SparkR -- jars not being added to application classpath correctly

2015-06-26 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.4 78b31a2a6 - 2579948bf


[SPARK-8607] SparkR -- jars not being added to application classpath correctly

Add `getStaticClass` method in SparkR's `RBackendHandler`

This is a fix for the problem referenced in 
[SPARK-5185](https://issues.apache.org/jira/browse/SPARK-5185).

cc shivaram

Author: cafreeman cfree...@alteryx.com

Closes #7001 from cafreeman/branch-1.4 and squashes the following commits:

8f81194 [cafreeman] Add missing license
31aedcf [cafreeman] Refactor test to call an external R script
2c22073 [cafreeman] Merge branch 'branch-1.4' of github.com:apache/spark into 
branch-1.4
0bea809 [cafreeman] Fixed relative path issue and added smaller JAR
ee25e60 [cafreeman] Merge branch 'branch-1.4' of github.com:apache/spark into 
branch-1.4
9a5c362 [cafreeman] test for including JAR when launching sparkContext
9101223 [cafreeman] Merge branch 'branch-1.4' of github.com:apache/spark into 
branch-1.4
5a80844 [cafreeman] Fix style nits
7c6bd0c [cafreeman] [SPARK-8607] SparkR


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2579948b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2579948b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2579948b

Branch: refs/heads/branch-1.4
Commit: 2579948bf5d89ac2d822ace605a6a4afce5258d6
Parents: 78b31a2
Author: cafreeman cfree...@alteryx.com
Authored: Fri Jun 26 17:06:02 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Fri Jun 26 17:06:02 2015 -0700

--
 .../inst/test_support/sparktestjar_2.10-1.0.jar | Bin 0 - 2886 bytes
 R/pkg/inst/tests/jarTest.R  |  32 
 R/pkg/inst/tests/test_includeJAR.R  |  37 +++
 .../apache/spark/api/r/RBackendHandler.scala|  17 -
 4 files changed, 85 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2579948b/R/pkg/inst/test_support/sparktestjar_2.10-1.0.jar
--
diff --git a/R/pkg/inst/test_support/sparktestjar_2.10-1.0.jar 
b/R/pkg/inst/test_support/sparktestjar_2.10-1.0.jar
new file mode 100644
index 000..1d5c2af
Binary files /dev/null and b/R/pkg/inst/test_support/sparktestjar_2.10-1.0.jar 
differ

http://git-wip-us.apache.org/repos/asf/spark/blob/2579948b/R/pkg/inst/tests/jarTest.R
--
diff --git a/R/pkg/inst/tests/jarTest.R b/R/pkg/inst/tests/jarTest.R
new file mode 100644
index 000..d68bb20
--- /dev/null
+++ b/R/pkg/inst/tests/jarTest.R
@@ -0,0 +1,32 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the License); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+library(SparkR)
+
+sc - sparkR.init()
+
+helloTest - SparkR:::callJStatic(sparkR.test.hello,
+  helloWorld,
+  Dave)
+
+basicFunction - SparkR:::callJStatic(sparkR.test.basicFunction,
+  addStuff,
+  2L,
+  2L)
+
+sparkR.stop()
+output - c(helloTest, basicFunction)
+writeLines(output)

http://git-wip-us.apache.org/repos/asf/spark/blob/2579948b/R/pkg/inst/tests/test_includeJAR.R
--
diff --git a/R/pkg/inst/tests/test_includeJAR.R 
b/R/pkg/inst/tests/test_includeJAR.R
new file mode 100644
index 000..8bc693b
--- /dev/null
+++ b/R/pkg/inst/tests/test_includeJAR.R
@@ -0,0 +1,37 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the License); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software

svn commit: r1684946 - in /spark: releases/_posts/2015-06-11-spark-release-1-4-0.md site/releases/spark-release-1-4-0.html

2015-06-11 Thread shivaram

Author: shivaram
Date: Thu Jun 11 16:59:48 2015
New Revision: 1684946

URL: http://svn.apache.org/r1684946
Log:
Add some more SparkR contributors

Modified:
spark/releases/_posts/2015-06-11-spark-release-1-4-0.md
spark/site/releases/spark-release-1-4-0.html

Modified: spark/releases/_posts/2015-06-11-spark-release-1-4-0.md
URL: 
http://svn.apache.org/viewvc/spark/releases/_posts/2015-06-11-spark-release-1-4-0.md?rev=1684946r1=1684945r2=1684946view=diff
==
--- spark/releases/_posts/2015-06-11-spark-release-1-4-0.md (original)
+++ spark/releases/_posts/2015-06-11-spark-release-1-4-0.md Thu Jun 11 16:59:48 
2015
@@ -99,7 +99,9 @@ Thanks to The following organizations, w
  * Andrew Or -- Bug fixes in Core
  * Andrew Or -- Improvements in Core and YARN; bug fixes in Core, Web UI, 
Streaming, tests, and SQL; improvement in Streaming, Web UI, Core, and SQL
  * Andrey Zagrebin -- Improvement in SQL
+ * Antonio Piccolboni -- New features in SparkR
  * Arsenii Krasikov -- Bug fixes in Core
+ * Ashutosh Raina -- New features in SparkR
  * Ashwin Shankar -- Bug fixes in YARN
  * Augustin Borsu -- New features in MLlib
  * Ben Fradet -- Documentation in Core and Streaming
@@ -115,6 +117,7 @@ Thanks to The following organizations, w
  * Cheng Lian -- Bug fixes in SQL
  * Cheng Lian -- Improvements in Core and SQL; documentation in Core and SQL; 
bug fixes in Core and SQL; improvement in SQL
  * Cheolsoo Park -- Wish in YARN; improvements in Core and spark submit; bug 
fixes in Core
+ * Chris Freeman -- New features in SparkR
  * Chet Mancini -- Improvements in Core and SQL
  * Chris Heller -- New features in Mesos
  * Christophe Preaud -- Documentation in Core and YARN
@@ -122,23 +125,27 @@ Thanks to The following organizations, w
  * DB Tsai -- Improvements, new features, and bug fixes in MLlib
  * DEBORAH SIEGEL -- Documentation in Core
  * Dan McClary -- New features in GraphX
+ * Dan Putler -- New features in SparkR
  * Daoyuan Wang -- Improvements in tests and SQL; new features in SQL; bug 
fixes in SQL; improvement in MLlib and SQL
  * David McGuire -- Bug fixes in Streaming
- * Davies Liu -- Improvements in SQL and PySpark; new features in Core and 
sparkr; bug fixes in Streaming, tests, PySpark, sparkr, and SQL; improvement in 
Core and SQL
- * Davies Liu -- New features in sparkr
+ * Davies Liu -- Improvements in SQL and PySpark; new features in Core and 
SparkR; bug fixes in Streaming, tests, PySpark, SparkR, and SQL; improvement in 
Core and SQL
+ * Davies Liu -- New features in SparkR
  * Dean Chen -- Improvements in Core; new features in YARN; bug fixes in Core 
and YARN
  * Debasish Das -- New features in MLlib
  * Deborah Siegel -- Improvements in Core
  * Doing Done -- Improvements in SQL; bug fixes in Core and SQL
  * Dong Xu -- Bug fixes in SQL
  * Doug Balog -- Bug fixes in spark submit, YARN, and SQL
+ * Edward T -- New features in SparkR
  * Elisey Zanko -- Bug fixes in MLlib and PySpark
  * Emre Sevinc -- Improvements in Streaming
  * Eric Chiang -- Documentation in Core
  * Erik Van Oosten -- Bug fixes in Core
  * Evan Jones -- Bug fixes in Core
  * Evan Yu -- Bug fixes in Core
+ * Evert Lammerts -- New features in SparkR
  * Favio Vazquez -- Build fixes in Core; documentation in Core and MLlib
+ * Felix Cheung -- SparkR Documentation
  * Florian Verhein -- Improvements and new features in EC2
  * Gaurav Nanda -- Documentation in Core
  * Glenn Weidner -- Documentation in MLlib and PySpark
@@ -148,9 +155,11 @@ Thanks to The following organizations, w
  * GuoQiang Li -- New features in Core; bug fixes in Core and YARN
  * Haiyang Sea -- Improvements in SQL
  * Hangchen Yu -- Documentation in GraphX
- * Hao Lin -- Improvements and new features in sparkr
+ * Hao Lin -- Improvements and new features in SparkR
  * Hari Shreedharan -- Test in Streaming and tests; new features in YARN; bug 
fixes in Web UI
+ * Harihar Nahak -- New features in SparkR
  * Holden Karau -- Improvements in Core, MLlib, and PySpark; bug fixes in 
PySpark
+ * Hossein Falaki -- SparkR Documentation
  * Hong Shen -- Bug fixes in Core and YARN
  * Hrishikesh Subramonian -- Improvements in MLlib and PySpark
  * Hung Lin -- Bug fixes in scheduler
@@ -163,7 +172,7 @@ Thanks to The following organizations, w
  * Jaonary Rabarisoa -- Improvements in MLlib
  * Jayson Sunshine -- Documentation in Core
  * Jean Lyn -- Bug fixes in SQL
- * Jeff Harrison -- Improvements in sparkr
+ * Jeff Harrison -- Improvements in SparkR
  * Jeremy A. Lucas -- Improvements in Streaming
  * Jeremy Freeman -- Bug fixes in Streaming and MLlib
  * Jim Carroll -- Bug fixes in MLlib
@@ -199,6 +208,7 @@ Thanks to The following organizations, w
  * Masayoshi TSUZUKI -- Bug fixes in Windows and Core
  * Matei Zaharia -- Improvement in Web UI
  * Matt Aasted -- Bug fixes in EC2
+ * Matt Massie -- New features in SparkR
  * Matt Wise -- Documentation in Core
  * Matthew Cheah

spark git commit: [SPARK-8310] [EC2] Updates the master branch EC2 versions

2015-06-11 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 1191c3efc - c8d551d54


[SPARK-8310] [EC2] Updates the master branch EC2 versions

Will send another PR for `branch-1.4`

Author: Shivaram Venkataraman shiva...@cs.berkeley.edu

Closes #6764 from shivaram/SPARK-8310 and squashes the following commits:

d8cd3b3 [Shivaram Venkataraman] This updates the master branch EC2 versions


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c8d551d5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c8d551d5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c8d551d5

Branch: refs/heads/master
Commit: c8d551d546979e126c91925487e30c353185e3ba
Parents: 1191c3e
Author: Shivaram Venkataraman shiva...@cs.berkeley.edu
Authored: Thu Jun 11 13:18:42 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Thu Jun 11 13:18:42 2015 -0700

--
 ec2/spark_ec2.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c8d551d5/ec2/spark_ec2.py
--
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index 84629cb..58b24ae 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -51,7 +51,7 @@ else:
 raw_input = input
 xrange = range
 
-SPARK_EC2_VERSION = 1.3.1
+SPARK_EC2_VERSION = 1.4.0
 SPARK_EC2_DIR = os.path.dirname(os.path.realpath(__file__))
 
 VALID_SPARK_VERSIONS = set([
@@ -89,7 +89,7 @@ DEFAULT_SPARK_GITHUB_REPO = https://github.com/apache/spark;
 
 # Default location to get the spark-ec2 scripts (and ami-list) from
 DEFAULT_SPARK_EC2_GITHUB_REPO = https://github.com/mesos/spark-ec2;
-DEFAULT_SPARK_EC2_BRANCH = branch-1.3
+DEFAULT_SPARK_EC2_BRANCH = branch-1.4
 
 
 def setup_external_libs(libs):


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8350] [R] Log R unit test output to unit-tests.log

2015-06-15 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 4c5889e8f - 56d4e8a2d


[SPARK-8350] [R] Log R unit test output to unit-tests.log

Right now it's logged to R-unit-tests.log. Jenkins currently only archives 
files named unit-tests.log, and this is what all other modules (e.g. SQL, 
network, REPL) use.
1. We should be consistent
2. I don't want to reconfigure Jenkins to accept a different file

shivaram

Author: andrewor14 and...@databricks.com
Author: Andrew Or and...@databricks.com

Closes #6807 from andrewor14/r-logs and squashes the following commits:

96005d2 [andrewor14] Nest unit-tests.log further until R
407c46c [andrewor14] Add target to log path
d7b68ae [Andrew Or] Log R unit test output to unit-tests.log


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/56d4e8a2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/56d4e8a2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/56d4e8a2

Branch: refs/heads/master
Commit: 56d4e8a2d0f6aab9a599cd8733e20500ffe8fc8a
Parents: 4c5889e
Author: andrewor14 and...@databricks.com
Authored: Mon Jun 15 08:16:22 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Mon Jun 15 08:16:22 2015 -0700

--
 R/log4j.properties | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/56d4e8a2/R/log4j.properties
--
diff --git a/R/log4j.properties b/R/log4j.properties
index 701adb2..cce8d91 100644
--- a/R/log4j.properties
+++ b/R/log4j.properties
@@ -19,7 +19,7 @@
 log4j.rootCategory=INFO, file
 log4j.appender.file=org.apache.log4j.FileAppender
 log4j.appender.file.append=true
-log4j.appender.file.file=R-unit-tests.log
+log4j.appender.file.file=R/target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
 log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p 
%c{1}: %m%n
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

svn commit: r1685612 - /spark/site/index.html

2015-06-15 Thread shivaram

Author: shivaram
Date: Mon Jun 15 16:05:17 2015
New Revision: 1685612

URL: http://svn.apache.org/r1685612
Log:
Add html for R update

Modified:
spark/site/index.html

Modified: spark/site/index.html
URL: 
http://svn.apache.org/viewvc/spark/site/index.html?rev=1685612r1=1685611r2=1685612view=diff
==
--- spark/site/index.html (original)
+++ spark/site/index.html Mon Jun 15 16:05:17 2015
@@ -200,13 +200,13 @@
 h2Ease of Use/h2
 
 p class=lead
-  Write applications quickly in Java, Scala or Python.
+  Write applications quickly in Java, Scala, Python, R.
 /p
 
 p
   Spark offers over 80 high-level operators that make it easy to build 
parallel apps.
   And you can use it eminteractively/em
-  from the Scala and Python shells.
+  from the Scala, Python and R shells.
 /p
   /div
   div class=col-md-5 col-sm-5 col-padded-top col-center



-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

svn commit: r1685610 - /spark/index.md

2015-06-15 Thread shivaram

Author: shivaram
Date: Mon Jun 15 16:04:31 2015
New Revision: 1685610

URL: http://svn.apache.org/r1685610
Log:
Add R to list of supported languages

Modified:
spark/index.md

Modified: spark/index.md
URL: 
http://svn.apache.org/viewvc/spark/index.md?rev=1685610r1=1685609r2=1685610view=diff
==
--- spark/index.md (original)
+++ spark/index.md Mon Jun 15 16:04:31 2015
@@ -41,13 +41,13 @@ navigation:
 h2Ease of Use/h2
 
 p class=lead
-  Write applications quickly in Java, Scala or Python.
+  Write applications quickly in Java, Scala, Python, R.
 /p
 
 p
   Spark offers over 80 high-level operators that make it easy to build 
parallel apps.
   And you can use it eminteractively/em
-  from the Scala and Python shells.
+  from the Scala, Python and R shells.
 /p
   /div
   div class=col-md-5 col-sm-5 col-padded-top col-center



-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8322] [EC2] Added spark 1.4.0 into the VALID_SPARK_VERSIONS and…

2015-06-12 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.4 8b25f62bf - 141eab71e


[SPARK-8322] [EC2] Added spark 1.4.0 into the VALID_SPARK_VERSIONS andâ¦

â¦ SPARK_TACHYON_MAP

Author: Mark Smith mark.sm...@bronto.com

Closes #6777 from markmsmith/branch-1.4 and squashes the following commits:

a218cfa [Mark Smith] [SPARK-8322][EC2] Fixed tachyon mapp entry to point to 
0.6.4
90d1655 [Mark Smith] [SPARK-8322][EC2] Added spark 1.4.0 into the 
VALID_SPARK_VERSIONS and SPARK_TACHYON_MAP


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/141eab71
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/141eab71
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/141eab71

Branch: refs/heads/branch-1.4
Commit: 141eab71ee3aa05da899ecfc6bae40b3798a4665
Parents: 8b25f62
Author: Mark Smith mark.sm...@bronto.com
Authored: Fri Jun 12 10:28:30 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Fri Jun 12 10:28:30 2015 -0700

--
 ec2/spark_ec2.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/141eab71/ec2/spark_ec2.py
--
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index a765c20..5aa3e3d 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -70,7 +70,7 @@ VALID_SPARK_VERSIONS = set([
 1.2.1,
 1.3.0,
 1.3.1,
-1.4.0
+1.4.0,
 ])
 
 SPARK_TACHYON_MAP = {
@@ -83,6 +83,7 @@ SPARK_TACHYON_MAP = {
 1.2.1: 0.5.0,
 1.3.0: 0.5.0,
 1.3.1: 0.5.0,
+1.4.0: 0.6.4,
 }
 
 DEFAULT_SPARK_VERSION = SPARK_EC2_VERSION


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-7227] [SPARKR] Support fillna / dropna in R DataFrame.

2015-05-31 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.4 bab0fab68 - f1d4e7e31


[SPARK-7227] [SPARKR] Support fillna / dropna in R DataFrame.

Author: Sun Rui rui@intel.com

Closes #6183 from sun-rui/SPARK-7227 and squashes the following commits:

dd6f5b3 [Sun Rui] Rename readEnv() back to readMap(). Add alias na.omit() for 
dropna().
41cf725 [Sun Rui] [SPARK-7227][SPARKR] Support fillna / dropna in R DataFrame.

(cherry picked from commit 46576ab303e50c54c3bd464f8939953efe644574)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f1d4e7e3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f1d4e7e3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f1d4e7e3

Branch: refs/heads/branch-1.4
Commit: f1d4e7e3111a6a44358d405389180d6cf6406223
Parents: bab0fab
Author: Sun Rui rui@intel.com
Authored: Sun May 31 15:01:21 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Sun May 31 15:02:16 2015 -0700

--
 R/pkg/NAMESPACE |   2 +
 R/pkg/R/DataFrame.R | 125 +++
 R/pkg/R/generics.R  |  18 +++
 R/pkg/R/serialize.R |  10 +-
 R/pkg/inst/tests/test_sparkSQL.R| 109 
 .../scala/org/apache/spark/api/r/SerDe.scala|   6 +-
 6 files changed, 267 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f1d4e7e3/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 411126a..f9447f6 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -19,9 +19,11 @@ exportMethods(arrange,
   count,
   describe,
   distinct,
+  dropna,
   dtypes,
   except,
   explain,
+  fillna,
   filter,
   first,
   group_by,

http://git-wip-us.apache.org/repos/asf/spark/blob/f1d4e7e3/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index e79d324..0af5cb8 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1429,3 +1429,128 @@ setMethod(describe,
 sdf - callJMethod(x@sdf, describe, listToSeq(colList))
 dataFrame(sdf)
   })
+
+#' dropna
+#'
+#' Returns a new DataFrame omitting rows with null values.
+#'
+#' @param x A SparkSQL DataFrame.
+#' @param how any or all.
+#'if any, drop a row if it contains any nulls.
+#'if all, drop a row only if all its values are null.
+#'if minNonNulls is specified, how is ignored.
+#' @param minNonNulls If specified, drop rows that have less than
+#'minNonNulls non-null values.
+#'This overwrites the how parameter.
+#' @param cols Optional list of column names to consider.
+#' @return A DataFrame
+#' 
+#' @rdname nafunctions
+#' @export
+#' @examples
+#'\dontrun{
+#' sc - sparkR.init()
+#' sqlCtx - sparkRSQL.init(sc)
+#' path - path/to/file.json
+#' df - jsonFile(sqlCtx, path)
+#' dropna(df)
+#' }
+setMethod(dropna,
+  signature(x = DataFrame),
+  function(x, how = c(any, all), minNonNulls = NULL, cols = NULL) {
+how - match.arg(how)
+if (is.null(cols)) {
+  cols - columns(x)
+}
+if (is.null(minNonNulls)) {
+  minNonNulls - if (how == any) { length(cols) } else { 1 }
+}
+
+naFunctions - callJMethod(x@sdf, na)
+sdf - callJMethod(naFunctions, drop,
+   as.integer(minNonNulls), 
listToSeq(as.list(cols)))
+dataFrame(sdf)
+  })
+
+#' @aliases dropna
+#' @export
+setMethod(na.omit,
+  signature(x = DataFrame),
+  function(x, how = c(any, all), minNonNulls = NULL, cols = NULL) {
+dropna(x, how, minNonNulls, cols)
+  })
+
+#' fillna
+#'
+#' Replace null values.
+#'
+#' @param x A SparkSQL DataFrame.
+#' @param value Value to replace null values with.
+#'  Should be an integer, numeric, character or named list.
+#'  If the value is a named list, then cols is ignored and
+#'  value must be a mapping from column name (character) to 
+#'  replacement value. The replacement value must be an
+#'  integer, numeric or character.
+#' @param cols optional list of column names to consider.
+#' Columns specified in cols that do not have matching data
+#' type are ignored. For example, if value is a character

spark git commit: [SPARK-7227] [SPARKR] Support fillna / dropna in R DataFrame.

2015-05-31 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 866652c90 - 46576ab30


[SPARK-7227] [SPARKR] Support fillna / dropna in R DataFrame.

Author: Sun Rui rui@intel.com

Closes #6183 from sun-rui/SPARK-7227 and squashes the following commits:

dd6f5b3 [Sun Rui] Rename readEnv() back to readMap(). Add alias na.omit() for 
dropna().
41cf725 [Sun Rui] [SPARK-7227][SPARKR] Support fillna / dropna in R DataFrame.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/46576ab3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/46576ab3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/46576ab3

Branch: refs/heads/master
Commit: 46576ab303e50c54c3bd464f8939953efe644574
Parents: 866652c
Author: Sun Rui rui@intel.com
Authored: Sun May 31 15:01:21 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Sun May 31 15:01:59 2015 -0700

--
 R/pkg/NAMESPACE |   2 +
 R/pkg/R/DataFrame.R | 125 +++
 R/pkg/R/generics.R  |  18 +++
 R/pkg/R/serialize.R |  10 +-
 R/pkg/inst/tests/test_sparkSQL.R| 109 
 .../scala/org/apache/spark/api/r/SerDe.scala|   6 +-
 6 files changed, 267 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/46576ab3/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 411126a..f9447f6 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -19,9 +19,11 @@ exportMethods(arrange,
   count,
   describe,
   distinct,
+  dropna,
   dtypes,
   except,
   explain,
+  fillna,
   filter,
   first,
   group_by,

http://git-wip-us.apache.org/repos/asf/spark/blob/46576ab3/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index e79d324..0af5cb8 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1429,3 +1429,128 @@ setMethod(describe,
 sdf - callJMethod(x@sdf, describe, listToSeq(colList))
 dataFrame(sdf)
   })
+
+#' dropna
+#'
+#' Returns a new DataFrame omitting rows with null values.
+#'
+#' @param x A SparkSQL DataFrame.
+#' @param how any or all.
+#'if any, drop a row if it contains any nulls.
+#'if all, drop a row only if all its values are null.
+#'if minNonNulls is specified, how is ignored.
+#' @param minNonNulls If specified, drop rows that have less than
+#'minNonNulls non-null values.
+#'This overwrites the how parameter.
+#' @param cols Optional list of column names to consider.
+#' @return A DataFrame
+#' 
+#' @rdname nafunctions
+#' @export
+#' @examples
+#'\dontrun{
+#' sc - sparkR.init()
+#' sqlCtx - sparkRSQL.init(sc)
+#' path - path/to/file.json
+#' df - jsonFile(sqlCtx, path)
+#' dropna(df)
+#' }
+setMethod(dropna,
+  signature(x = DataFrame),
+  function(x, how = c(any, all), minNonNulls = NULL, cols = NULL) {
+how - match.arg(how)
+if (is.null(cols)) {
+  cols - columns(x)
+}
+if (is.null(minNonNulls)) {
+  minNonNulls - if (how == any) { length(cols) } else { 1 }
+}
+
+naFunctions - callJMethod(x@sdf, na)
+sdf - callJMethod(naFunctions, drop,
+   as.integer(minNonNulls), 
listToSeq(as.list(cols)))
+dataFrame(sdf)
+  })
+
+#' @aliases dropna
+#' @export
+setMethod(na.omit,
+  signature(x = DataFrame),
+  function(x, how = c(any, all), minNonNulls = NULL, cols = NULL) {
+dropna(x, how, minNonNulls, cols)
+  })
+
+#' fillna
+#'
+#' Replace null values.
+#'
+#' @param x A SparkSQL DataFrame.
+#' @param value Value to replace null values with.
+#'  Should be an integer, numeric, character or named list.
+#'  If the value is a named list, then cols is ignored and
+#'  value must be a mapping from column name (character) to 
+#'  replacement value. The replacement value must be an
+#'  integer, numeric or character.
+#' @param cols optional list of column names to consider.
+#' Columns specified in cols that do not have matching data
+#' type are ignored. For example, if value is a character, and 
+#' subset contains a non-character column, then the non-character
+#' column is simply ignored.
+#' @return A DataFrame

spark git commit: [SPARK-8028] [SPARKR] Use addJar instead of setJars in SparkR

2015-06-01 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.4 f5a9833f3 - cbfb682ab


[SPARK-8028] [SPARKR] Use addJar instead of setJars in SparkR

This prevents the spark.jars from being cleared while using `--packages` or 
`--jars`

cc pwendell davies brkyvz

Author: Shivaram Venkataraman shiva...@cs.berkeley.edu

Closes #6568 from shivaram/SPARK-8028 and squashes the following commits:

3a9cf1f [Shivaram Venkataraman] Use addJar instead of setJars in SparkR This 
prevents the spark.jars from being cleared

(cherry picked from commit 6b44278ef7cd2a278dfa67e8393ef30775c72726)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cbfb682a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cbfb682a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cbfb682a

Branch: refs/heads/branch-1.4
Commit: cbfb682ab90d259ca716ef6987b4ca367b79eda3
Parents: f5a9833
Author: Shivaram Venkataraman shiva...@cs.berkeley.edu
Authored: Mon Jun 1 21:01:14 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Mon Jun 1 21:01:26 2015 -0700

--
 core/src/main/scala/org/apache/spark/api/r/RRDD.scala | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/cbfb682a/core/src/main/scala/org/apache/spark/api/r/RRDD.scala
--
diff --git a/core/src/main/scala/org/apache/spark/api/r/RRDD.scala 
b/core/src/main/scala/org/apache/spark/api/r/RRDD.scala
index e020458..4dfa732 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RRDD.scala
@@ -355,7 +355,6 @@ private[r] object RRDD {
 
 val sparkConf = new SparkConf().setAppName(appName)
.setSparkHome(sparkHome)
-   .setJars(jars)
 
 // Override `master` if we have a user-specified value
 if (master != ) {
@@ -373,7 +372,11 @@ private[r] object RRDD {
   sparkConf.setExecutorEnv(name.asInstanceOf[String], 
value.asInstanceOf[String])
 }
 
-new JavaSparkContext(sparkConf)
+val jsc = new JavaSparkContext(sparkConf)
+jars.foreach { jar =
+  jsc.addJar(jar)
+}
+jsc
   }
 
   /**


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8028] [SPARKR] Use addJar instead of setJars in SparkR

2015-06-01 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 15d7c90ae - 6b44278ef


[SPARK-8028] [SPARKR] Use addJar instead of setJars in SparkR

This prevents the spark.jars from being cleared while using `--packages` or 
`--jars`

cc pwendell davies brkyvz

Author: Shivaram Venkataraman shiva...@cs.berkeley.edu

Closes #6568 from shivaram/SPARK-8028 and squashes the following commits:

3a9cf1f [Shivaram Venkataraman] Use addJar instead of setJars in SparkR This 
prevents the spark.jars from being cleared


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6b44278e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6b44278e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6b44278e

Branch: refs/heads/master
Commit: 6b44278ef7cd2a278dfa67e8393ef30775c72726
Parents: 15d7c90
Author: Shivaram Venkataraman shiva...@cs.berkeley.edu
Authored: Mon Jun 1 21:01:14 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Mon Jun 1 21:01:14 2015 -0700

--
 core/src/main/scala/org/apache/spark/api/r/RRDD.scala | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/6b44278e/core/src/main/scala/org/apache/spark/api/r/RRDD.scala
--
diff --git a/core/src/main/scala/org/apache/spark/api/r/RRDD.scala 
b/core/src/main/scala/org/apache/spark/api/r/RRDD.scala
index e020458..4dfa732 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RRDD.scala
@@ -355,7 +355,6 @@ private[r] object RRDD {
 
 val sparkConf = new SparkConf().setAppName(appName)
.setSparkHome(sparkHome)
-   .setJars(jars)
 
 // Override `master` if we have a user-specified value
 if (master != ) {
@@ -373,7 +372,11 @@ private[r] object RRDD {
   sparkConf.setExecutorEnv(name.asInstanceOf[String], 
value.asInstanceOf[String])
 }
 
-new JavaSparkContext(sparkConf)
+val jsc = new JavaSparkContext(sparkConf)
+jars.foreach { jar =
+  jsc.addJar(jar)
+}
+jsc
   }
 
   /**


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8027] [SPARKR] Add maven profile to build R package docs

2015-06-01 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 89f642a0e - cae9306c4


[SPARK-8027] [SPARKR] Add maven profile to build R package docs

Also use that profile in create-release.sh

cc pwendell -- Note that this means that we need `knitr` and `roxygen` 
installed on the machines used for building the release. Let me know if you 
need help with that.

Author: Shivaram Venkataraman shiva...@cs.berkeley.edu

Closes #6567 from shivaram/SPARK-8027 and squashes the following commits:

8dc8ecf [Shivaram Venkataraman] Add maven profile to build R package docs Also 
use that profile in create-release.sh


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cae9306c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cae9306c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cae9306c

Branch: refs/heads/master
Commit: cae9306c4f437c722baa57593fe83f4b7d82dbff
Parents: 89f642a
Author: Shivaram Venkataraman shiva...@cs.berkeley.edu
Authored: Mon Jun 1 21:21:45 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Mon Jun 1 21:21:45 2015 -0700

--
 core/pom.xml | 23 +++
 dev/create-release/create-release.sh | 16 
 2 files changed, 31 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/cae9306c/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index 5c02be8..a021842 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -481,6 +481,29 @@
 /plugins
   /build
 /profile
+profile
+  idsparkr-docs/id
+  build
+plugins
+  plugin
+groupIdorg.codehaus.mojo/groupId
+artifactIdexec-maven-plugin/artifactId
+executions
+  execution
+idsparkr-pkg-docs/id
+phasecompile/phase
+goals
+  goalexec/goal
+/goals
+  /execution
+/executions
+configuration
+  
executable..${path.separator}R${path.separator}create-docs${script.extension}/executable
+/configuration
+  /plugin
+/plugins
+  /build
+/profile
   /profiles
 
 /project

http://git-wip-us.apache.org/repos/asf/spark/blob/cae9306c/dev/create-release/create-release.sh
--
diff --git a/dev/create-release/create-release.sh 
b/dev/create-release/create-release.sh
index 54274a8..0b14a61 100755
--- a/dev/create-release/create-release.sh
+++ b/dev/create-release/create-release.sh
@@ -228,14 +228,14 @@ if [[ ! $@ =~ --skip-package ]]; then
 
   # We increment the Zinc port each time to avoid OOM's and other craziness if 
multiple builds
   # share the same Zinc server.
-  make_binary_release hadoop1 -Psparkr -Phadoop-1 -Phive 
-Phive-thriftserver 3030 
-  make_binary_release hadoop1-scala2.11 -Psparkr -Phadoop-1 -Phive 
-Dscala-2.11 3031 
-  make_binary_release cdh4 -Psparkr -Phadoop-1 -Phive -Phive-thriftserver 
-Dhadoop.version=2.0.0-mr1-cdh4.2.0 3032 
-  make_binary_release hadoop2.3 -Psparkr -Phadoop-2.3 -Phive 
-Phive-thriftserver -Pyarn 3033 
-  make_binary_release hadoop2.4 -Psparkr -Phadoop-2.4 -Phive 
-Phive-thriftserver -Pyarn 3034 
-  make_binary_release mapr3 -Pmapr3 -Psparkr -Phive -Phive-thriftserver 
3035 
-  make_binary_release mapr4 -Pmapr4 -Psparkr -Pyarn -Phive 
-Phive-thriftserver 3036 
-  make_binary_release hadoop2.4-without-hive -Psparkr -Phadoop-2.4 -Pyarn 
3037 
+  make_binary_release hadoop1 -Psparkr -Psparkr-docs -Phadoop-1 -Phive 
-Phive-thriftserver 3030 
+  make_binary_release hadoop1-scala2.11 -Psparkr -Psparkr-docs -Phadoop-1 
-Phive -Dscala-2.11 3031 
+  make_binary_release cdh4 -Psparkr -Psparkr-docs -Phadoop-1 -Phive 
-Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0 3032 
+  make_binary_release hadoop2.3 -Psparkr -Psparkr-docs  -Phadoop-2.3 -Phive 
-Phive-thriftserver -Pyarn 3033 
+  make_binary_release hadoop2.4 -Psparkr -Psparkr-docs -Phadoop-2.4 -Phive 
-Phive-thriftserver -Pyarn 3034 
+  make_binary_release mapr3 -Pmapr3 -Psparkr -Psparkr-docs -Phive 
-Phive-thriftserver 3035 
+  make_binary_release mapr4 -Pmapr4 -Psparkr -Psparkr-docs -Pyarn -Phive 
-Phive-thriftserver 3036 
+  make_binary_release hadoop2.4-without-hive -Psparkr -Psparkr-docs 
-Phadoop-2.4 -Pyarn 3037 
   wait
   rm -rf spark-$RELEASE_VERSION-bin-*/
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8027] [SPARKR] Add maven profile to build R package docs

2015-06-01 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.4 8ac23762e - d542a35ad


[SPARK-8027] [SPARKR] Add maven profile to build R package docs

Also use that profile in create-release.sh

cc pwendell -- Note that this means that we need `knitr` and `roxygen` 
installed on the machines used for building the release. Let me know if you 
need help with that.

Author: Shivaram Venkataraman shiva...@cs.berkeley.edu

Closes #6567 from shivaram/SPARK-8027 and squashes the following commits:

8dc8ecf [Shivaram Venkataraman] Add maven profile to build R package docs Also 
use that profile in create-release.sh

(cherry picked from commit cae9306c4f437c722baa57593fe83f4b7d82dbff)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d542a35a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d542a35a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d542a35a

Branch: refs/heads/branch-1.4
Commit: d542a35ad74a9e530d0160adf9d10ff7c3075d0d
Parents: 8ac2376
Author: Shivaram Venkataraman shiva...@cs.berkeley.edu
Authored: Mon Jun 1 21:21:45 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Mon Jun 1 21:21:55 2015 -0700

--
 core/pom.xml | 23 +++
 dev/create-release/create-release.sh | 16 
 2 files changed, 31 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d542a35a/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index e58efe4..1f903fc 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -475,6 +475,29 @@
 /plugins
   /build
 /profile
+profile
+  idsparkr-docs/id
+  build
+plugins
+  plugin
+groupIdorg.codehaus.mojo/groupId
+artifactIdexec-maven-plugin/artifactId
+executions
+  execution
+idsparkr-pkg-docs/id
+phasecompile/phase
+goals
+  goalexec/goal
+/goals
+  /execution
+/executions
+configuration
+  
executable..${path.separator}R${path.separator}create-docs${script.extension}/executable
+/configuration
+  /plugin
+/plugins
+  /build
+/profile
   /profiles
 
 /project

http://git-wip-us.apache.org/repos/asf/spark/blob/d542a35a/dev/create-release/create-release.sh
--
diff --git a/dev/create-release/create-release.sh 
b/dev/create-release/create-release.sh
index 54274a8..0b14a61 100755
--- a/dev/create-release/create-release.sh
+++ b/dev/create-release/create-release.sh
@@ -228,14 +228,14 @@ if [[ ! $@ =~ --skip-package ]]; then
 
   # We increment the Zinc port each time to avoid OOM's and other craziness if 
multiple builds
   # share the same Zinc server.
-  make_binary_release hadoop1 -Psparkr -Phadoop-1 -Phive 
-Phive-thriftserver 3030 
-  make_binary_release hadoop1-scala2.11 -Psparkr -Phadoop-1 -Phive 
-Dscala-2.11 3031 
-  make_binary_release cdh4 -Psparkr -Phadoop-1 -Phive -Phive-thriftserver 
-Dhadoop.version=2.0.0-mr1-cdh4.2.0 3032 
-  make_binary_release hadoop2.3 -Psparkr -Phadoop-2.3 -Phive 
-Phive-thriftserver -Pyarn 3033 
-  make_binary_release hadoop2.4 -Psparkr -Phadoop-2.4 -Phive 
-Phive-thriftserver -Pyarn 3034 
-  make_binary_release mapr3 -Pmapr3 -Psparkr -Phive -Phive-thriftserver 
3035 
-  make_binary_release mapr4 -Pmapr4 -Psparkr -Pyarn -Phive 
-Phive-thriftserver 3036 
-  make_binary_release hadoop2.4-without-hive -Psparkr -Phadoop-2.4 -Pyarn 
3037 
+  make_binary_release hadoop1 -Psparkr -Psparkr-docs -Phadoop-1 -Phive 
-Phive-thriftserver 3030 
+  make_binary_release hadoop1-scala2.11 -Psparkr -Psparkr-docs -Phadoop-1 
-Phive -Dscala-2.11 3031 
+  make_binary_release cdh4 -Psparkr -Psparkr-docs -Phadoop-1 -Phive 
-Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0 3032 
+  make_binary_release hadoop2.3 -Psparkr -Psparkr-docs  -Phadoop-2.3 -Phive 
-Phive-thriftserver -Pyarn 3033 
+  make_binary_release hadoop2.4 -Psparkr -Psparkr-docs -Phadoop-2.4 -Phive 
-Phive-thriftserver -Pyarn 3034 
+  make_binary_release mapr3 -Pmapr3 -Psparkr -Psparkr-docs -Phive 
-Phive-thriftserver 3035 
+  make_binary_release mapr4 -Pmapr4 -Psparkr -Psparkr-docs -Pyarn -Phive 
-Phive-thriftserver 3036 
+  make_binary_release hadoop2.4-without-hive -Psparkr -Psparkr-docs 
-Phadoop-2.4 -Pyarn 3037 
   wait
   rm -rf spark-$RELEASE_VERSION-bin-*/
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h

spark git commit: [SPARK-7714] [SPARKR] SparkR tests should use more specific expectations than expect_true

2015-07-01 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master fdcad6ef4 - 69c5dee2f


[SPARK-7714] [SPARKR] SparkR tests should use more specific expectations than 
expect_true

1. Update the pattern 'expect_true(a == b)' to 'expect_equal(a, b)'.
2. Update the pattern 'expect_true(inherits(a, b))' to 'expect_is(a, b)'.
3. Update the pattern 'expect_true(identical(a, b))' to 'expect_identical(a, 
b)'.

Author: Sun Rui rui@intel.com

Closes #7152 from sun-rui/SPARK-7714 and squashes the following commits:

8ad2440 [Sun Rui] Fix test case errors.
8fe9f0c [Sun Rui] Update the pattern 'expect_true(identical(a, b))' to 
'expect_identical(a, b)'.
f1b8005 [Sun Rui] Update the pattern 'expect_true(inherits(a, b))' to 
'expect_is(a, b)'.
f631e94 [Sun Rui] Update the pattern 'expect_true(a == b)' to 'expect_equal(a, 
b)'.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/69c5dee2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/69c5dee2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/69c5dee2

Branch: refs/heads/master
Commit: 69c5dee2f01b1ae35bd813d31d46429a32cb475d
Parents: fdcad6e
Author: Sun Rui rui@intel.com
Authored: Wed Jul 1 09:50:12 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Wed Jul 1 09:50:12 2015 -0700

--
 R/pkg/inst/tests/test_binaryFile.R  |   2 +-
 R/pkg/inst/tests/test_binary_function.R |   4 +-
 R/pkg/inst/tests/test_includeJAR.R  |   4 +-
 R/pkg/inst/tests/test_parallelize_collect.R |   2 +-
 R/pkg/inst/tests/test_rdd.R |   4 +-
 R/pkg/inst/tests/test_sparkSQL.R| 354 +++
 R/pkg/inst/tests/test_take.R|   8 +-
 R/pkg/inst/tests/test_textFile.R|   6 +-
 R/pkg/inst/tests/test_utils.R   |   4 +-
 9 files changed, 194 insertions(+), 194 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/69c5dee2/R/pkg/inst/tests/test_binaryFile.R
--
diff --git a/R/pkg/inst/tests/test_binaryFile.R 
b/R/pkg/inst/tests/test_binaryFile.R
index 4db7266..ccaea18 100644
--- a/R/pkg/inst/tests/test_binaryFile.R
+++ b/R/pkg/inst/tests/test_binaryFile.R
@@ -82,7 +82,7 @@ test_that(saveAsObjectFile()/objectFile() works with 
multiple paths, {
   saveAsObjectFile(rdd2, fileName2)
 
   rdd - objectFile(sc, c(fileName1, fileName2))
-  expect_true(count(rdd) == 2)
+  expect_equal(count(rdd), 2)
 
   unlink(fileName1, recursive = TRUE)
   unlink(fileName2, recursive = TRUE)

http://git-wip-us.apache.org/repos/asf/spark/blob/69c5dee2/R/pkg/inst/tests/test_binary_function.R
--
diff --git a/R/pkg/inst/tests/test_binary_function.R 
b/R/pkg/inst/tests/test_binary_function.R
index a1e354e..3be8c65 100644
--- a/R/pkg/inst/tests/test_binary_function.R
+++ b/R/pkg/inst/tests/test_binary_function.R
@@ -38,13 +38,13 @@ test_that(union on two RDDs, {
   union.rdd - unionRDD(rdd, text.rdd)
   actual - collect(union.rdd)
   expect_equal(actual, c(as.list(nums), mockFile))
-  expect_true(getSerializedMode(union.rdd) == byte)
+  expect_equal(getSerializedMode(union.rdd), byte)
 
   rdd- map(text.rdd, function(x) {x})
   union.rdd - unionRDD(rdd, text.rdd)
   actual - collect(union.rdd)
   expect_equal(actual, as.list(c(mockFile, mockFile)))
-  expect_true(getSerializedMode(union.rdd) == byte)
+  expect_equal(getSerializedMode(union.rdd), byte)
 
   unlink(fileName)
 })

http://git-wip-us.apache.org/repos/asf/spark/blob/69c5dee2/R/pkg/inst/tests/test_includeJAR.R
--
diff --git a/R/pkg/inst/tests/test_includeJAR.R 
b/R/pkg/inst/tests/test_includeJAR.R
index 8bc693b..844d86f 100644
--- a/R/pkg/inst/tests/test_includeJAR.R
+++ b/R/pkg/inst/tests/test_includeJAR.R
@@ -31,7 +31,7 @@ runScript - function() {
 test_that(sparkJars tag in SparkContext, {
   testOutput - runScript()
   helloTest - testOutput[1]
-  expect_true(helloTest == Hello, Dave)
+  expect_equal(helloTest, Hello, Dave)
   basicFunction - testOutput[2]
-  expect_true(basicFunction == 4L)
+  expect_equal(basicFunction, 4)
 })

http://git-wip-us.apache.org/repos/asf/spark/blob/69c5dee2/R/pkg/inst/tests/test_parallelize_collect.R
--
diff --git a/R/pkg/inst/tests/test_parallelize_collect.R 
b/R/pkg/inst/tests/test_parallelize_collect.R
index fff0286..2552127 100644
--- a/R/pkg/inst/tests/test_parallelize_collect.R
+++ b/R/pkg/inst/tests/test_parallelize_collect.R
@@ -57,7 +57,7 @@ test_that(parallelize() on simple vectors and lists returns 
an RDD, {
  strListRDD2)
 
   for (rdd in rdds) {
-expect_true(inherits(rdd, RDD

spark git commit: [SPARK-8596] [EC2] Added port for Rstudio

2015-06-28 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master ec7843819 - 9ce78b434


[SPARK-8596] [EC2] Added port for Rstudio

This would otherwise need to be set manually by R users in AWS.

https://issues.apache.org/jira/browse/SPARK-8596

Author: Vincent D. Warmerdam vincentwarmer...@gmail.com
Author: vincent vincentwarmer...@gmail.com

Closes #7068 from koaning/rstudio-port-number and squashes the following 
commits:

ac8100d [vincent] Update spark_ec2.py
ce6ad88 [Vincent D. Warmerdam] added port number for rstudio


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9ce78b43
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9ce78b43
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9ce78b43

Branch: refs/heads/master
Commit: 9ce78b4343febe87c4edd650c698cc20d38f615d
Parents: ec78438
Author: Vincent D. Warmerdam vincentwarmer...@gmail.com
Authored: Sun Jun 28 13:33:33 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Sun Jun 28 13:33:33 2015 -0700

--
 ec2/spark_ec2.py | 2 ++
 1 file changed, 2 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9ce78b43/ec2/spark_ec2.py
--
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index e4932cf..18ccbc0 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -505,6 +505,8 @@ def launch_cluster(conn, opts, cluster_name):
 master_group.authorize('tcp', 50070, 50070, authorized_address)
 master_group.authorize('tcp', 60070, 60070, authorized_address)
 master_group.authorize('tcp', 4040, 4045, authorized_address)
+# Rstudio (GUI for R) needs port 8787 for web access
+master_group.authorize('tcp', 8787, 8787, authorized_address)
 # HDFS NFS gateway requires 111,2049,4242 for tcp  udp
 master_group.authorize('tcp', 111, 111, authorized_address)
 master_group.authorize('udp', 111, 111, authorized_address)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8549] [SPARKR] Fix the line length of SparkR

2015-07-05 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master f9c448dce - a0cb111b2


[SPARK-8549] [SPARKR] Fix the line length of SparkR

[[SPARK-8549] Fix the line length of SparkR - ASF 
JIRA](https://issues.apache.org/jira/browse/SPARK-8549)

Author: Yu ISHIKAWA yuu.ishik...@gmail.com

Closes #7204 from yu-iskw/SPARK-8549 and squashes the following commits:

6fb131a [Yu ISHIKAWA] Fix the typo
1737598 [Yu ISHIKAWA] [SPARK-8549][SparkR] Fix the line length of SparkR


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a0cb111b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a0cb111b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a0cb111b

Branch: refs/heads/master
Commit: a0cb111b22cb093e86b0daeecb3dcc41d095df40
Parents: f9c448d
Author: Yu ISHIKAWA yuu.ishik...@gmail.com
Authored: Sun Jul 5 20:50:02 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Sun Jul 5 20:50:02 2015 -0700

--
 R/pkg/R/generics.R |  3 ++-
 R/pkg/R/pairRDD.R  | 12 ++--
 R/pkg/R/sparkR.R   |  9 ++---
 R/pkg/R/utils.R| 31 ++-
 R/pkg/inst/tests/test_includeJAR.R |  4 ++--
 R/pkg/inst/tests/test_rdd.R| 12 
 R/pkg/inst/tests/test_sparkSQL.R   | 11 +--
 7 files changed, 51 insertions(+), 31 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a0cb111b/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 79055b7..fad9d71 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -20,7 +20,8 @@
 # @rdname aggregateRDD
 # @seealso reduce
 # @export
-setGeneric(aggregateRDD, function(x, zeroValue, seqOp, combOp) { 
standardGeneric(aggregateRDD) })
+setGeneric(aggregateRDD,
+   function(x, zeroValue, seqOp, combOp) { 
standardGeneric(aggregateRDD) })
 
 # @rdname cache-methods
 # @export

http://git-wip-us.apache.org/repos/asf/spark/blob/a0cb111b/R/pkg/R/pairRDD.R
--
diff --git a/R/pkg/R/pairRDD.R b/R/pkg/R/pairRDD.R
index 7f902ba..0f1179e 100644
--- a/R/pkg/R/pairRDD.R
+++ b/R/pkg/R/pairRDD.R
@@ -560,8 +560,8 @@ setMethod(join,
 # Left outer join two RDDs
 #
 # @description
-# \code{leftouterjoin} This function left-outer-joins two RDDs where every 
element is of the form list(K, V).
-# The key types of the two RDDs should be the same.
+# \code{leftouterjoin} This function left-outer-joins two RDDs where every 
element is of
+# the form list(K, V). The key types of the two RDDs should be the same.
 #
 # @param x An RDD to be joined. Should be an RDD where each element is
 # list(K, V).
@@ -597,8 +597,8 @@ setMethod(leftOuterJoin,
 # Right outer join two RDDs
 #
 # @description
-# \code{rightouterjoin} This function right-outer-joins two RDDs where every 
element is of the form list(K, V).
-# The key types of the two RDDs should be the same.
+# \code{rightouterjoin} This function right-outer-joins two RDDs where every 
element is of
+# the form list(K, V). The key types of the two RDDs should be the same.
 #
 # @param x An RDD to be joined. Should be an RDD where each element is
 # list(K, V).
@@ -634,8 +634,8 @@ setMethod(rightOuterJoin,
 # Full outer join two RDDs
 #
 # @description
-# \code{fullouterjoin} This function full-outer-joins two RDDs where every 
element is of the form list(K, V).
-# The key types of the two RDDs should be the same.
+# \code{fullouterjoin} This function full-outer-joins two RDDs where every 
element is of
+# the form list(K, V). The key types of the two RDDs should be the same.
 #
 # @param x An RDD to be joined. Should be an RDD where each element is
 # list(K, V).

http://git-wip-us.apache.org/repos/asf/spark/blob/a0cb111b/R/pkg/R/sparkR.R
--
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index 86233e0..048eb8e 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -105,7 +105,8 @@ sparkR.init - function(
   sparkPackages = ) {
 
   if (exists(.sparkRjsc, envir = .sparkREnv)) {
-cat(Re-using existing Spark Context. Please stop SparkR with 
sparkR.stop() or restart R to create a new Spark Context\n)
+cat(paste(Re-using existing Spark Context.,
+  Please stop SparkR with sparkR.stop() or restart R to create a 
new Spark Context\n))
 return(get(.sparkRjsc, envir = .sparkREnv))
   }
 
@@ -180,14 +181,16 @@ sparkR.init - function(
 
   sparkExecutorEnvMap - new.env()
   if (!any(names(sparkExecutorEnv) == LD_LIBRARY_PATH)) {
-sparkExecutorEnvMap[[LD_LIBRARY_PATH]] - 
paste0($LD_LIBRARY_PATH:,Sys.getenv(LD_LIBRARY_PATH

spark git commit: [HOTFIX] Copy SparkR lib if it exists in make-distribution

2015-05-23 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.4 8d6d8a538 - fbc4480d9


[HOTFIX] Copy SparkR lib if it exists in make-distribution

This is to fix an issue reported in #6373 where the `cp` would fail if 
`-Psparkr` was not used in the build

cc dragos pwendell

Author: Shivaram Venkataraman shiva...@cs.berkeley.edu

Closes #6379 from shivaram/make-distribution-hotfix and squashes the following 
commits:

08eb7e4 [Shivaram Venkataraman] Copy SparkR lib if it exists in 
make-distribution

(cherry picked from commit b231baa24857ea83c8062dd4e033db4e35bf457d)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fbc4480d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fbc4480d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fbc4480d

Branch: refs/heads/branch-1.4
Commit: fbc4480d9359a10609b79d429a15a244eff5f65f
Parents: 8d6d8a5
Author: Shivaram Venkataraman shiva...@cs.berkeley.edu
Authored: Sat May 23 12:28:16 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Sat May 23 12:28:24 2015 -0700

--
 make-distribution.sh | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/fbc4480d/make-distribution.sh
--
diff --git a/make-distribution.sh b/make-distribution.sh
index 7882734..a2b0c43 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -229,10 +229,13 @@ cp $SPARK_HOME/conf/*.template $DISTDIR/conf
 cp $SPARK_HOME/README.md $DISTDIR
 cp -r $SPARK_HOME/bin $DISTDIR
 cp -r $SPARK_HOME/python $DISTDIR
-mkdir -p $DISTDIR/R/lib
-cp -r $SPARK_HOME/R/lib/SparkR $DISTDIR/R/lib
 cp -r $SPARK_HOME/sbin $DISTDIR
 cp -r $SPARK_HOME/ec2 $DISTDIR
+# Copy SparkR if it exists
+if [ -d $SPARK_HOME/R/lib/SparkR ]; then
+  mkdir -p $DISTDIR/R/lib
+  cp -r $SPARK_HOME/R/lib/SparkR $DISTDIR/R/lib
+fi
 
 # Download and copy in tachyon, if requested
 if [ $SPARK_TACHYON == true ]; then


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [HOTFIX] Copy SparkR lib if it exists in make-distribution

2015-05-23 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 2b7e63585 - b231baa24


[HOTFIX] Copy SparkR lib if it exists in make-distribution

This is to fix an issue reported in #6373 where the `cp` would fail if 
`-Psparkr` was not used in the build

cc dragos pwendell

Author: Shivaram Venkataraman shiva...@cs.berkeley.edu

Closes #6379 from shivaram/make-distribution-hotfix and squashes the following 
commits:

08eb7e4 [Shivaram Venkataraman] Copy SparkR lib if it exists in 
make-distribution


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b231baa2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b231baa2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b231baa2

Branch: refs/heads/master
Commit: b231baa24857ea83c8062dd4e033db4e35bf457d
Parents: 2b7e635
Author: Shivaram Venkataraman shiva...@cs.berkeley.edu
Authored: Sat May 23 12:28:16 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Sat May 23 12:28:16 2015 -0700

--
 make-distribution.sh | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b231baa2/make-distribution.sh
--
diff --git a/make-distribution.sh b/make-distribution.sh
index 7882734..a2b0c43 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -229,10 +229,13 @@ cp $SPARK_HOME/conf/*.template $DISTDIR/conf
 cp $SPARK_HOME/README.md $DISTDIR
 cp -r $SPARK_HOME/bin $DISTDIR
 cp -r $SPARK_HOME/python $DISTDIR
-mkdir -p $DISTDIR/R/lib
-cp -r $SPARK_HOME/R/lib/SparkR $DISTDIR/R/lib
 cp -r $SPARK_HOME/sbin $DISTDIR
 cp -r $SPARK_HOME/ec2 $DISTDIR
+# Copy SparkR if it exists
+if [ -d $SPARK_HOME/R/lib/SparkR ]; then
+  mkdir -p $DISTDIR/R/lib
+  cp -r $SPARK_HOME/R/lib/SparkR $DISTDIR/R/lib
+fi
 
 # Download and copy in tachyon, if requested
 if [ $SPARK_TACHYON == true ]; then


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-6811] Copy SparkR lib in make-distribution.sh

2015-05-23 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 7af3818c6 - a40bca011


[SPARK-6811] Copy SparkR lib in make-distribution.sh

This change also remove native libraries from SparkR to make sure our 
distribution works across platforms

Tested by building on Mac, running on Amazon Linux (CentOS), Windows VM and 
vice-versa (built on Linux run on Mac)

I will also test this with YARN soon and update this PR.

Author: Shivaram Venkataraman shiva...@cs.berkeley.edu

Closes #6373 from shivaram/sparkr-binary and squashes the following commits:

ae41b5c [Shivaram Venkataraman] Remove native libraries from SparkR Also 
include the built SparkR package in make-distribution.sh


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a40bca01
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a40bca01
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a40bca01

Branch: refs/heads/master
Commit: a40bca0111de45763c3ef4270afb2185c16b8f95
Parents: 7af3818
Author: Shivaram Venkataraman shiva...@cs.berkeley.edu
Authored: Sat May 23 00:04:01 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Sat May 23 00:04:01 2015 -0700

--
 R/pkg/NAMESPACE |  5 +++-
 R/pkg/R/utils.R | 38 -
 R/pkg/src-native/Makefile   | 27 ++
 R/pkg/src-native/Makefile.win   | 27 ++
 R/pkg/src-native/string_hash_code.c | 49 
 R/pkg/src/Makefile  | 27 --
 R/pkg/src/Makefile.win  | 27 --
 R/pkg/src/string_hash_code.c| 49 
 make-distribution.sh|  2 ++
 9 files changed, 146 insertions(+), 105 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 64ffdcf..411126a 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -1,6 +1,9 @@
 # Imports from base R
 importFrom(methods, setGeneric, setMethod, setOldClass)
-useDynLib(SparkR, stringHashCode)
+
+# Disable native libraries till we figure out how to package it
+# See SPARKR-7839
+#useDynLib(SparkR, stringHashCode)
 
 # S3 methods exported
 export(sparkR.init)

http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/R/utils.R
--
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index 0e7b7bd..69b2700 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -122,13 +122,49 @@ hashCode - function(key) {
 intBits - packBits(rawToBits(rawVec), integer)
 as.integer(bitwXor(intBits[2], intBits[1]))
   } else if (class(key) == character) {
-.Call(stringHashCode, key)
+# TODO: SPARK-7839 means we might not have the native library available
+if (is.loaded(stringHashCode)) {
+  .Call(stringHashCode, key)
+} else {
+  n - nchar(key)
+  if (n == 0) {
+0L
+  } else {
+asciiVals - sapply(charToRaw(key), function(x) { strtoi(x, 16L) })
+hashC - 0
+for (k in 1:length(asciiVals)) {
+  hashC - mult31AndAdd(hashC, asciiVals[k])
+}
+as.integer(hashC)
+  }
+}
   } else {
 warning(paste(Could not hash object, returning 0, sep = ))
 as.integer(0)
   }
 }
 
+# Helper function used to wrap a 'numeric' value to integer bounds.
+# Useful for implementing C-like integer arithmetic
+wrapInt - function(value) {
+  if (value  .Machine$integer.max) {
+value - value - 2 * .Machine$integer.max - 2
+  } else if (value  -1 * .Machine$integer.max) {
+value - 2 * .Machine$integer.max + value + 2
+  }
+  value
+}
+
+# Multiply `val` by 31 and add `addVal` to the result. Ensures that
+# integer-overflows are handled at every step.
+mult31AndAdd - function(val, addVal) {
+  vec - c(bitwShiftL(val, c(4,3,2,1,0)), addVal)
+  Reduce(function(a, b) {
+  wrapInt(as.numeric(a) + as.numeric(b))
+ },
+ vec)
+}
+
 # Create a new RDD with serializedMode == byte.
 # Return itself if already in byte format.
 serializeToBytes - function(rdd) {

http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/src-native/Makefile
--
diff --git a/R/pkg/src-native/Makefile b/R/pkg/src-native/Makefile
new file mode 100644
index 000..a55a56f
--- /dev/null
+++ b/R/pkg/src-native/Makefile
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses

spark git commit: [SPARK-6811] Copy SparkR lib in make-distribution.sh

2015-05-23 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.4 c636b87dc - c8eb76ba6


[SPARK-6811] Copy SparkR lib in make-distribution.sh

This change also remove native libraries from SparkR to make sure our 
distribution works across platforms

Tested by building on Mac, running on Amazon Linux (CentOS), Windows VM and 
vice-versa (built on Linux run on Mac)

I will also test this with YARN soon and update this PR.

Author: Shivaram Venkataraman shiva...@cs.berkeley.edu

Closes #6373 from shivaram/sparkr-binary and squashes the following commits:

ae41b5c [Shivaram Venkataraman] Remove native libraries from SparkR Also 
include the built SparkR package in make-distribution.sh

(cherry picked from commit a40bca0111de45763c3ef4270afb2185c16b8f95)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c8eb76ba
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c8eb76ba
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c8eb76ba

Branch: refs/heads/branch-1.4
Commit: c8eb76ba673026f2fb2b22e8b3e8102a5940297c
Parents: c636b87
Author: Shivaram Venkataraman shiva...@cs.berkeley.edu
Authored: Sat May 23 00:04:01 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Sat May 23 00:04:32 2015 -0700

--
 R/pkg/NAMESPACE |  5 +++-
 R/pkg/R/utils.R | 38 -
 R/pkg/src-native/Makefile   | 27 ++
 R/pkg/src-native/Makefile.win   | 27 ++
 R/pkg/src-native/string_hash_code.c | 49 
 R/pkg/src/Makefile  | 27 --
 R/pkg/src/Makefile.win  | 27 --
 R/pkg/src/string_hash_code.c| 49 
 make-distribution.sh|  2 ++
 9 files changed, 146 insertions(+), 105 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c8eb76ba/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 64ffdcf..411126a 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -1,6 +1,9 @@
 # Imports from base R
 importFrom(methods, setGeneric, setMethod, setOldClass)
-useDynLib(SparkR, stringHashCode)
+
+# Disable native libraries till we figure out how to package it
+# See SPARKR-7839
+#useDynLib(SparkR, stringHashCode)
 
 # S3 methods exported
 export(sparkR.init)

http://git-wip-us.apache.org/repos/asf/spark/blob/c8eb76ba/R/pkg/R/utils.R
--
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index 0e7b7bd..69b2700 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -122,13 +122,49 @@ hashCode - function(key) {
 intBits - packBits(rawToBits(rawVec), integer)
 as.integer(bitwXor(intBits[2], intBits[1]))
   } else if (class(key) == character) {
-.Call(stringHashCode, key)
+# TODO: SPARK-7839 means we might not have the native library available
+if (is.loaded(stringHashCode)) {
+  .Call(stringHashCode, key)
+} else {
+  n - nchar(key)
+  if (n == 0) {
+0L
+  } else {
+asciiVals - sapply(charToRaw(key), function(x) { strtoi(x, 16L) })
+hashC - 0
+for (k in 1:length(asciiVals)) {
+  hashC - mult31AndAdd(hashC, asciiVals[k])
+}
+as.integer(hashC)
+  }
+}
   } else {
 warning(paste(Could not hash object, returning 0, sep = ))
 as.integer(0)
   }
 }
 
+# Helper function used to wrap a 'numeric' value to integer bounds.
+# Useful for implementing C-like integer arithmetic
+wrapInt - function(value) {
+  if (value  .Machine$integer.max) {
+value - value - 2 * .Machine$integer.max - 2
+  } else if (value  -1 * .Machine$integer.max) {
+value - 2 * .Machine$integer.max + value + 2
+  }
+  value
+}
+
+# Multiply `val` by 31 and add `addVal` to the result. Ensures that
+# integer-overflows are handled at every step.
+mult31AndAdd - function(val, addVal) {
+  vec - c(bitwShiftL(val, c(4,3,2,1,0)), addVal)
+  Reduce(function(a, b) {
+  wrapInt(as.numeric(a) + as.numeric(b))
+ },
+ vec)
+}
+
 # Create a new RDD with serializedMode == byte.
 # Return itself if already in byte format.
 serializeToBytes - function(rdd) {

http://git-wip-us.apache.org/repos/asf/spark/blob/c8eb76ba/R/pkg/src-native/Makefile
--
diff --git a/R/pkg/src-native/Makefile b/R/pkg/src-native/Makefile
new file mode 100644
index 000..a55a56f
--- /dev/null
+++ b/R/pkg/src-native/Makefile
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license

spark git commit: [SPARK-6806] [SPARKR] [DOCS] Fill in SparkR examples in programming guide

2015-05-23 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.4 b928db4fe - c636b87dc


[SPARK-6806] [SPARKR] [DOCS] Fill in SparkR examples in programming guide

sqlCtx - sqlContext

You can check the docs by:

```
$ cd docs
$ SKIP_SCALADOC=1 jekyll serve
```
cc shivaram

Author: Davies Liu dav...@databricks.com

Closes #5442 from davies/r_docs and squashes the following commits:

7a12ec6 [Davies Liu] remove rdd in R docs
8496b26 [Davies Liu] remove the docs related to RDD
e23b9d6 [Davies Liu] delete R docs for RDD API
222e4ff [Davies Liu] Merge branch 'master' into r_docs
89684ce [Davies Liu] Merge branch 'r_docs' of github.com:davies/spark into 
r_docs
f0a10e1 [Davies Liu] address comments from @shivaram
f61de71 [Davies Liu] Update pairRDD.R
3ef7cf3 [Davies Liu] use + instead of function(a,b) a+b
2f10a77 [Davies Liu] address comments from @cafreeman
9c2a062 [Davies Liu] mention R api together with Python API
23f751a [Davies Liu] Fill in SparkR examples in programming guide

(cherry picked from commit 7af3818c6b2bf35bfa531ab7cc3a4a714385015e)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c636b87d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c636b87d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c636b87d

Branch: refs/heads/branch-1.4
Commit: c636b87dc287ce99a887bc59cad31aaf48477a56
Parents: b928db4
Author: Davies Liu dav...@databricks.com
Authored: Sat May 23 00:00:30 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Sat May 23 00:02:22 2015 -0700

--
 R/README.md  |   4 +-
 R/pkg/R/DataFrame.R  | 176 
 R/pkg/R/RDD.R|   2 +-
 R/pkg/R/SQLContext.R | 165 ---
 R/pkg/R/pairRDD.R|   4 +-
 R/pkg/R/sparkR.R |  10 +-
 R/pkg/inst/profile/shell.R   |   6 +-
 R/pkg/inst/tests/test_sparkSQL.R | 156 +++---
 docs/_plugins/copy_api_dirs.rb   |  68 ---
 docs/api.md  |   3 +-
 docs/index.md|  23 ++-
 docs/programming-guide.md|  21 +-
 docs/quick-start.md  |  18 +-
 docs/sql-programming-guide.md| 373 +-
 14 files changed, 706 insertions(+), 323 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c636b87d/R/README.md
--
diff --git a/R/README.md b/R/README.md
index a6970e3..d7d65b4 100644
--- a/R/README.md
+++ b/R/README.md
@@ -52,7 +52,7 @@ The SparkR documentation (Rd files and HTML files) are not a 
part of the source
 SparkR comes with several sample programs in the `examples/src/main/r` 
directory.
 To run one of them, use `./bin/sparkR filename args`. For example:
 
-./bin/sparkR examples/src/main/r/pi.R local[2]
+./bin/sparkR examples/src/main/r/dataframe.R
 
 You can also run the unit-tests for SparkR by running (you need to install the 
[testthat](http://cran.r-project.org/web/packages/testthat/index.html) package 
first):
 
@@ -63,5 +63,5 @@ You can also run the unit-tests for SparkR by running (you 
need to install the [
 The `./bin/spark-submit` and `./bin/sparkR` can also be used to submit jobs to 
YARN clusters. You will need to set YARN conf dir before doing so. For example 
on CDH you can run
 ```
 export YARN_CONF_DIR=/etc/hadoop/conf
-./bin/spark-submit --master yarn examples/src/main/r/pi.R 4
+./bin/spark-submit --master yarn examples/src/main/r/dataframe.R
 ```

http://git-wip-us.apache.org/repos/asf/spark/blob/c636b87d/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index a7fa32e..ed8093c 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -65,9 +65,9 @@ dataFrame - function(sdf, isCached = FALSE) {
 #' @examples
 #'\dontrun{
 #' sc - sparkR.init()
-#' sqlCtx - sparkRSQL.init(sc)
+#' sqlContext - sparkRSQL.init(sc)
 #' path - path/to/file.json
-#' df - jsonFile(sqlCtx, path)
+#' df - jsonFile(sqlContext, path)
 #' printSchema(df)
 #'}
 setMethod(printSchema,
@@ -88,9 +88,9 @@ setMethod(printSchema,
 #' @examples
 #'\dontrun{
 #' sc - sparkR.init()
-#' sqlCtx - sparkRSQL.init(sc)
+#' sqlContext - sparkRSQL.init(sc)
 #' path - path/to/file.json
-#' df - jsonFile(sqlCtx, path)
+#' df - jsonFile(sqlContext, path)
 #' dfSchema - schema(df)
 #'}
 setMethod(schema,
@@ -110,9 +110,9 @@ setMethod(schema,
 #' @examples
 #'\dontrun{
 #' sc - sparkR.init()
-#' sqlCtx - sparkRSQL.init(sc)
+#' sqlContext - sparkRSQL.init(sc)
 #' path - path/to/file.json
-#' df - jsonFile(sqlCtx, path)
+#' df - jsonFile(sqlContext, path)
 #' explain(df, TRUE)
 #'}
 setMethod(explain

spark git commit: [SPARK-6806] [SPARKR] [DOCS] Fill in SparkR examples in programming guide

2015-05-23 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 4583cf4be - 7af3818c6


[SPARK-6806] [SPARKR] [DOCS] Fill in SparkR examples in programming guide

sqlCtx - sqlContext

You can check the docs by:

```
$ cd docs
$ SKIP_SCALADOC=1 jekyll serve
```
cc shivaram

Author: Davies Liu dav...@databricks.com

Closes #5442 from davies/r_docs and squashes the following commits:

7a12ec6 [Davies Liu] remove rdd in R docs
8496b26 [Davies Liu] remove the docs related to RDD
e23b9d6 [Davies Liu] delete R docs for RDD API
222e4ff [Davies Liu] Merge branch 'master' into r_docs
89684ce [Davies Liu] Merge branch 'r_docs' of github.com:davies/spark into 
r_docs
f0a10e1 [Davies Liu] address comments from @shivaram
f61de71 [Davies Liu] Update pairRDD.R
3ef7cf3 [Davies Liu] use + instead of function(a,b) a+b
2f10a77 [Davies Liu] address comments from @cafreeman
9c2a062 [Davies Liu] mention R api together with Python API
23f751a [Davies Liu] Fill in SparkR examples in programming guide


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7af3818c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7af3818c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7af3818c

Branch: refs/heads/master
Commit: 7af3818c6b2bf35bfa531ab7cc3a4a714385015e
Parents: 4583cf4
Author: Davies Liu dav...@databricks.com
Authored: Sat May 23 00:00:30 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Sat May 23 00:01:40 2015 -0700

--
 R/README.md  |   4 +-
 R/pkg/R/DataFrame.R  | 176 
 R/pkg/R/RDD.R|   2 +-
 R/pkg/R/SQLContext.R | 165 ---
 R/pkg/R/pairRDD.R|   4 +-
 R/pkg/R/sparkR.R |  10 +-
 R/pkg/inst/profile/shell.R   |   6 +-
 R/pkg/inst/tests/test_sparkSQL.R | 156 +++---
 docs/_plugins/copy_api_dirs.rb   |  68 ---
 docs/api.md  |   3 +-
 docs/index.md|  23 ++-
 docs/programming-guide.md|  21 +-
 docs/quick-start.md  |  18 +-
 docs/sql-programming-guide.md| 373 +-
 14 files changed, 706 insertions(+), 323 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7af3818c/R/README.md
--
diff --git a/R/README.md b/R/README.md
index a6970e3..d7d65b4 100644
--- a/R/README.md
+++ b/R/README.md
@@ -52,7 +52,7 @@ The SparkR documentation (Rd files and HTML files) are not a 
part of the source
 SparkR comes with several sample programs in the `examples/src/main/r` 
directory.
 To run one of them, use `./bin/sparkR filename args`. For example:
 
-./bin/sparkR examples/src/main/r/pi.R local[2]
+./bin/sparkR examples/src/main/r/dataframe.R
 
 You can also run the unit-tests for SparkR by running (you need to install the 
[testthat](http://cran.r-project.org/web/packages/testthat/index.html) package 
first):
 
@@ -63,5 +63,5 @@ You can also run the unit-tests for SparkR by running (you 
need to install the [
 The `./bin/spark-submit` and `./bin/sparkR` can also be used to submit jobs to 
YARN clusters. You will need to set YARN conf dir before doing so. For example 
on CDH you can run
 ```
 export YARN_CONF_DIR=/etc/hadoop/conf
-./bin/spark-submit --master yarn examples/src/main/r/pi.R 4
+./bin/spark-submit --master yarn examples/src/main/r/dataframe.R
 ```

http://git-wip-us.apache.org/repos/asf/spark/blob/7af3818c/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index a7fa32e..ed8093c 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -65,9 +65,9 @@ dataFrame - function(sdf, isCached = FALSE) {
 #' @examples
 #'\dontrun{
 #' sc - sparkR.init()
-#' sqlCtx - sparkRSQL.init(sc)
+#' sqlContext - sparkRSQL.init(sc)
 #' path - path/to/file.json
-#' df - jsonFile(sqlCtx, path)
+#' df - jsonFile(sqlContext, path)
 #' printSchema(df)
 #'}
 setMethod(printSchema,
@@ -88,9 +88,9 @@ setMethod(printSchema,
 #' @examples
 #'\dontrun{
 #' sc - sparkR.init()
-#' sqlCtx - sparkRSQL.init(sc)
+#' sqlContext - sparkRSQL.init(sc)
 #' path - path/to/file.json
-#' df - jsonFile(sqlCtx, path)
+#' df - jsonFile(sqlContext, path)
 #' dfSchema - schema(df)
 #'}
 setMethod(schema,
@@ -110,9 +110,9 @@ setMethod(schema,
 #' @examples
 #'\dontrun{
 #' sc - sparkR.init()
-#' sqlCtx - sparkRSQL.init(sc)
+#' sqlContext - sparkRSQL.init(sc)
 #' path - path/to/file.json
-#' df - jsonFile(sqlCtx, path)
+#' df - jsonFile(sqlContext, path)
 #' explain(df, TRUE)
 #'}
 setMethod(explain,
@@ -139,9 +139,9 @@ setMethod(explain,
 #' @examples
 #'\dontrun{
 #' sc - sparkR.init()
-#' sqlCtx - sparkRSQL.init(sc)
+#' sqlContext

spark git commit: [SPARK-8821] [EC2] Switched to binary mode for file reading

2015-07-07 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 738c10748 - 70beb808e


[SPARK-8821] [EC2] Switched to binary mode for file reading

Otherwise the script will crash with

- Downloading boto...
Traceback (most recent call last):
  File ec2/spark_ec2.py, line 148, in module
setup_external_libs(external_libs)
  File ec2/spark_ec2.py, line 128, in setup_external_libs
if hashlib.md5(tar.read()).hexdigest() != lib[md5]:
  File /usr/lib/python3.4/codecs.py, line 319, in decode
(result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x8b in position 1: 
invalid start byte

In case of an utf8 env setting.

Author: Simon Hafner hafnersi...@gmail.com

Closes #7215 from reactormonk/branch-1.4 and squashes the following commits:

e86957a [Simon Hafner] [SPARK-8821] [EC2] Switched to binary mode

(cherry picked from commit 83a621a5a8f8a2991c4cfa687279589e5c623d46)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/70beb808
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/70beb808
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/70beb808

Branch: refs/heads/master
Commit: 70beb808e13f6371968ac87f7cf625ed110375e6
Parents: 738c107
Author: Simon Hafner hafnersi...@gmail.com
Authored: Tue Jul 7 09:42:59 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Tue Jul 7 09:43:16 2015 -0700

--
 ec2/spark_ec2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/70beb808/ec2/spark_ec2.py
--
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index 18ccbc0..8582d43 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -125,7 +125,7 @@ def setup_external_libs(libs):
 )
 with open(tgz_file_path, wb) as tgz_file:
 tgz_file.write(download_stream.read())
-with open(tgz_file_path) as tar:
+with open(tgz_file_path, rb) as tar:
 if hashlib.md5(tar.read()).hexdigest() != lib[md5]:
 print(ERROR: Got wrong md5sum for 
{lib}..format(lib=lib[name]), file=stderr)
 sys.exit(1)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8821] [EC2] Switched to binary mode for file reading

2015-07-07 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.4 bf8b47d17 - 83a621a5a


[SPARK-8821] [EC2] Switched to binary mode for file reading

Otherwise the script will crash with

- Downloading boto...
Traceback (most recent call last):
  File ec2/spark_ec2.py, line 148, in module
setup_external_libs(external_libs)
  File ec2/spark_ec2.py, line 128, in setup_external_libs
if hashlib.md5(tar.read()).hexdigest() != lib[md5]:
  File /usr/lib/python3.4/codecs.py, line 319, in decode
(result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x8b in position 1: 
invalid start byte

In case of an utf8 env setting.

Author: Simon Hafner hafnersi...@gmail.com

Closes #7215 from reactormonk/branch-1.4 and squashes the following commits:

e86957a [Simon Hafner] [SPARK-8821] [EC2] Switched to binary mode


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/83a621a5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/83a621a5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/83a621a5

Branch: refs/heads/branch-1.4
Commit: 83a621a5a8f8a2991c4cfa687279589e5c623d46
Parents: bf8b47d
Author: Simon Hafner hafnersi...@gmail.com
Authored: Tue Jul 7 09:42:59 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Tue Jul 7 09:42:59 2015 -0700

--
 ec2/spark_ec2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/83a621a5/ec2/spark_ec2.py
--
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index 05fa47f..91f0a24 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -127,7 +127,7 @@ def setup_external_libs(libs):
 )
 with open(tgz_file_path, wb) as tgz_file:
 tgz_file.write(download_stream.read())
-with open(tgz_file_path) as tar:
+with open(tgz_file_path, rb) as tar:
 if hashlib.md5(tar.read()).hexdigest() != lib[md5]:
 print(ERROR: Got wrong md5sum for 
{lib}..format(lib=lib[name]), file=stderr)
 sys.exit(1)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-9318] [SPARK-9320] [SPARKR] Aliases for merge and summary functions on DataFrames

2015-07-31 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 8cb415a4b - 712f5b7a9


[SPARK-9318] [SPARK-9320] [SPARKR] Aliases for merge and summary functions on 
DataFrames

This PR adds synonyms for ```merge``` and ```summary``` in SparkR DataFrame API.

cc shivaram

Author: Hossein hoss...@databricks.com

Closes #7806 from falaki/SPARK-9320 and squashes the following commits:

72600f7 [Hossein] Updated docs
92a6e75 [Hossein] Fixed merge generic signature issue
4c2b051 [Hossein] Fixing naming with mllib summary
0f3a64c [Hossein] Added ... to generic for merge
30fbaf8 [Hossein] Merged master
ae1a4cf [Hossein] Merge branch 'master' into SPARK-9320
e8eb86f [Hossein] Add a generic for merge
fc01f2d [Hossein] Added unit test
8d92012 [Hossein] Added merge as an alias for join
5b8bedc [Hossein] Added unit test
632693d [Hossein] Added summary as an alias for describe for DataFrame


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/712f5b7a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/712f5b7a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/712f5b7a

Branch: refs/heads/master
Commit: 712f5b7a9ab52c26e3d086629633950ec2fb7afc
Parents: 8cb415a
Author: Hossein hoss...@databricks.com
Authored: Fri Jul 31 19:24:00 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Fri Jul 31 19:24:44 2015 -0700

--
 R/pkg/NAMESPACE  |  2 ++
 R/pkg/R/DataFrame.R  | 22 ++
 R/pkg/R/generics.R   |  8 
 R/pkg/R/mllib.R  |  8 
 R/pkg/inst/tests/test_sparkSQL.R | 14 --
 5 files changed, 48 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/712f5b7a/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index ff116cb..b2d92bd 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -46,6 +46,7 @@ exportMethods(arrange,
   isLocal,
   join,
   limit,
+  merge,
   names,
   ncol,
   nrow,
@@ -69,6 +70,7 @@ exportMethods(arrange,
   show,
   showDF,
   summarize,
+  summary,
   take,
   unionAll,
   unique,

http://git-wip-us.apache.org/repos/asf/spark/blob/712f5b7a/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index b4065d2..8956032 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1279,6 +1279,15 @@ setMethod(join,
 dataFrame(sdf)
   })
 
+#' rdname merge
+#' aliases join
+setMethod(merge,
+  signature(x = DataFrame, y = DataFrame),
+  function(x, y, joinExpr = NULL, joinType = NULL, ...) {
+join(x, y, joinExpr, joinType)
+  })
+
+
 #' UnionAll
 #'
 #' Return a new DataFrame containing the union of rows in this DataFrame
@@ -1524,6 +1533,19 @@ setMethod(describe,
 dataFrame(sdf)
   })
 
+#' @title Summary
+#'
+#' @description Computes statistics for numeric columns of the DataFrame
+#'
+#' @rdname summary
+#' @aliases describe
+setMethod(summary,
+  signature(x = DataFrame),
+  function(x) {
+describe(x)
+  })
+
+
 #' dropna
 #'
 #' Returns a new DataFrame omitting rows with null values.

http://git-wip-us.apache.org/repos/asf/spark/blob/712f5b7a/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 71d1e34..c43b947 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -461,6 +461,10 @@ setGeneric(isLocal, function(x) { 
standardGeneric(isLocal) })
 #' @export
 setGeneric(limit, function(x, num) {standardGeneric(limit) })
 
+#' rdname merge
+#' @export
+setGeneric(merge)
+
 #' @rdname withColumn
 #' @export
 setGeneric(mutate, function(x, ...) {standardGeneric(mutate) })
@@ -531,6 +535,10 @@ setGeneric(showDF, function(x,...) { 
standardGeneric(showDF) })
 #' @export
 setGeneric(summarize, function(x,...) { standardGeneric(summarize) })
 
+##' rdname summary
+##' @export
+setGeneric(summary, function(x, ...) { standardGeneric(summary) })
+
 # @rdname tojson
 # @export
 setGeneric(toJSON, function(x) { standardGeneric(toJSON) })

http://git-wip-us.apache.org/repos/asf/spark/blob/712f5b7a/R/pkg/R/mllib.R
--
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index efddcc1..b524d1f 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -86,12 +86,12 @@ setMethod(predict, signature(object = PipelineModel),
 #' model - glm(y ~ x, trainingData

spark git commit: [SPARK-9855] [SPARKR] Add expression functions into SparkR whose params are simple

2015-08-12 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 0d1d146c2 - f4bc01f1f


[SPARK-9855] [SPARKR] Add expression functions into SparkR whose params are 
simple

I added lots of expression functions for SparkR. This PR includes only 
functions whose params  are only `(Column)` or `(Column, Column)`.  And I think 
we need to improve how to test those functions. However, it would be better to 
work on another issue.

## Diff Summary

- Add lots of functions in `functions.R` and their generic in `generic.R`
- Add aliases for `ceiling` and `sign`
- Move expression functions from `column.R` to `functions.R`
- Modify `rdname` from `column` to `functions`

I haven't supported `not` function, because the name has a collesion with 
`testthat` package. I didn't think of the way  to define it.

## New Supported Functions

```
approxCountDistinct
ascii
base64
bin
bitwiseNOT
ceil (alias: ceiling)
crc32
dayofmonth
dayofyear
explode
factorial
hex
hour
initcap
isNaN
last_day
length
log2
ltrim
md5
minute
month
negate
quarter
reverse
round
rtrim
second
sha1
signum (alias: sign)
size
soundex
to_date
trim
unbase64
unhex
weekofyear
year

datediff
levenshtein
months_between
nanvl
pmod
```

## JIRA
[[SPARK-9855] Add expression functions into SparkR whose params are simple - 
ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9855)

Author: Yu ISHIKAWA yuu.ishik...@gmail.com

Closes #8123 from yu-iskw/SPARK-9855.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f4bc01f1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f4bc01f1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f4bc01f1

Branch: refs/heads/master
Commit: f4bc01f1f33a93e6affe5c8a3e33ffbd92d03f38
Parents: 0d1d146
Author: Yu ISHIKAWA yuu.ishik...@gmail.com
Authored: Wed Aug 12 18:33:27 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Wed Aug 12 18:33:27 2015 -0700

--
 R/pkg/DESCRIPTION|   1 +
 R/pkg/R/column.R |  81 ---
 R/pkg/R/functions.R  | 123 ++
 R/pkg/R/generics.R   | 185 +++---
 R/pkg/inst/tests/test_sparkSQL.R |  21 ++--
 5 files changed, 309 insertions(+), 102 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f4bc01f1/R/pkg/DESCRIPTION
--
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 4949d86..83e6489 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -29,6 +29,7 @@ Collate:
 'client.R'
 'context.R'
 'deserialize.R'
+'functions.R'
 'mllib.R'
 'serialize.R'
 'sparkR.R'

http://git-wip-us.apache.org/repos/asf/spark/blob/f4bc01f1/R/pkg/R/column.R
--
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index eeaf9f1..328f595 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -60,12 +60,6 @@ operators - list(
 )
 column_functions1 - c(asc, desc, isNull, isNotNull)
 column_functions2 - c(like, rlike, startsWith, endsWith, getField, 
getItem, contains)
-functions - c(min, max, sum, avg, mean, count, abs, sqrt,
-   first, last, lower, upper, sumDistinct,
-   acos, asin, atan, cbrt, ceiling, cos, cosh, exp,
-   expm1, floor, log, log10, log1p, rint, sign,
-   sin, sinh, tan, tanh, toDegrees, toRadians)
-binary_mathfunctions - c(atan2, hypot)
 
 createOperator - function(op) {
   setMethod(op,
@@ -111,33 +105,6 @@ createColumnFunction2 - function(name) {
 })
 }
 
-createStaticFunction - function(name) {
-  setMethod(name,
-signature(x = Column),
-function(x) {
-  if (name == ceiling) {
-  name - ceil
-  }
-  if (name == sign) {
-  name - signum
-  }
-  jc - callJStatic(org.apache.spark.sql.functions, name, x@jc)
-  column(jc)
-})
-}
-
-createBinaryMathfunctions - function(name) {
-  setMethod(name,
-signature(y = Column),
-function(y, x) {
-  if (class(x) == Column) {
-x - x@jc
-  }
-  jc - callJStatic(org.apache.spark.sql.functions, name, y@jc, 
x)
-  column(jc)
-})
-}
-
 createMethods - function() {
   for (op in names(operators)) {
 createOperator(op)
@@ -148,12 +115,6 @@ createMethods - function() {
   for (name in column_functions2) {
 createColumnFunction2(name)
   }
-  for (x in functions) {
-createStaticFunction(x)
-  }
-  for (name in binary_mathfunctions) {
-createBinaryMathfunctions(name)
-  }
 }
 
 createMethods()
@@ -242,45 +203,3 @@ setMethod(%in%,
 jc - callJMethod(x

spark git commit: [SPARK-9855] [SPARKR] Add expression functions into SparkR whose params are simple

2015-08-12 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 62ab2a4c6 - ca39c9e91


[SPARK-9855] [SPARKR] Add expression functions into SparkR whose params are 
simple

I added lots of expression functions for SparkR. This PR includes only 
functions whose params  are only `(Column)` or `(Column, Column)`.  And I think 
we need to improve how to test those functions. However, it would be better to 
work on another issue.

## Diff Summary

- Add lots of functions in `functions.R` and their generic in `generic.R`
- Add aliases for `ceiling` and `sign`
- Move expression functions from `column.R` to `functions.R`
- Modify `rdname` from `column` to `functions`

I haven't supported `not` function, because the name has a collesion with 
`testthat` package. I didn't think of the way  to define it.

## New Supported Functions

```
approxCountDistinct
ascii
base64
bin
bitwiseNOT
ceil (alias: ceiling)
crc32
dayofmonth
dayofyear
explode
factorial
hex
hour
initcap
isNaN
last_day
length
log2
ltrim
md5
minute
month
negate
quarter
reverse
round
rtrim
second
sha1
signum (alias: sign)
size
soundex
to_date
trim
unbase64
unhex
weekofyear
year

datediff
levenshtein
months_between
nanvl
pmod
```

## JIRA
[[SPARK-9855] Add expression functions into SparkR whose params are simple - 
ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9855)

Author: Yu ISHIKAWA yuu.ishik...@gmail.com

Closes #8123 from yu-iskw/SPARK-9855.

(cherry picked from commit f4bc01f1f33a93e6affe5c8a3e33ffbd92d03f38)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ca39c9e9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ca39c9e9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ca39c9e9

Branch: refs/heads/branch-1.5
Commit: ca39c9e91602223f5665ab6942b917c4900bd996
Parents: 62ab2a4
Author: Yu ISHIKAWA yuu.ishik...@gmail.com
Authored: Wed Aug 12 18:33:27 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Wed Aug 12 18:33:35 2015 -0700

--
 R/pkg/DESCRIPTION|   1 +
 R/pkg/R/column.R |  81 ---
 R/pkg/R/functions.R  | 123 ++
 R/pkg/R/generics.R   | 185 +++---
 R/pkg/inst/tests/test_sparkSQL.R |  21 ++--
 5 files changed, 309 insertions(+), 102 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ca39c9e9/R/pkg/DESCRIPTION
--
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 4949d86..83e6489 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -29,6 +29,7 @@ Collate:
 'client.R'
 'context.R'
 'deserialize.R'
+'functions.R'
 'mllib.R'
 'serialize.R'
 'sparkR.R'

http://git-wip-us.apache.org/repos/asf/spark/blob/ca39c9e9/R/pkg/R/column.R
--
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index eeaf9f1..328f595 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -60,12 +60,6 @@ operators - list(
 )
 column_functions1 - c(asc, desc, isNull, isNotNull)
 column_functions2 - c(like, rlike, startsWith, endsWith, getField, 
getItem, contains)
-functions - c(min, max, sum, avg, mean, count, abs, sqrt,
-   first, last, lower, upper, sumDistinct,
-   acos, asin, atan, cbrt, ceiling, cos, cosh, exp,
-   expm1, floor, log, log10, log1p, rint, sign,
-   sin, sinh, tan, tanh, toDegrees, toRadians)
-binary_mathfunctions - c(atan2, hypot)
 
 createOperator - function(op) {
   setMethod(op,
@@ -111,33 +105,6 @@ createColumnFunction2 - function(name) {
 })
 }
 
-createStaticFunction - function(name) {
-  setMethod(name,
-signature(x = Column),
-function(x) {
-  if (name == ceiling) {
-  name - ceil
-  }
-  if (name == sign) {
-  name - signum
-  }
-  jc - callJStatic(org.apache.spark.sql.functions, name, x@jc)
-  column(jc)
-})
-}
-
-createBinaryMathfunctions - function(name) {
-  setMethod(name,
-signature(y = Column),
-function(y, x) {
-  if (class(x) == Column) {
-x - x@jc
-  }
-  jc - callJStatic(org.apache.spark.sql.functions, name, y@jc, 
x)
-  column(jc)
-})
-}
-
 createMethods - function() {
   for (op in names(operators)) {
 createOperator(op)
@@ -148,12 +115,6 @@ createMethods - function() {
   for (name in column_functions2) {
 createColumnFunction2(name)
   }
-  for (x in functions) {
-createStaticFunction(x)
-  }
-  for (name

spark git commit: [SPARK-9916] [BUILD] [SPARKR] removed left-over sparkr.zip copy/create commands from codebase

2015-08-12 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master d7053bea9 - 2fb4901b7


[SPARK-9916] [BUILD] [SPARKR] removed left-over sparkr.zip copy/create commands 
from codebase

sparkr.zip is now built by SparkSubmit on a need-to-build basis.

cc shivaram

Author: Burak Yavuz brk...@gmail.com

Closes #8147 from brkyvz/make-dist-fix.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2fb4901b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2fb4901b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2fb4901b

Branch: refs/heads/master
Commit: 2fb4901b71cee65d40a43e61e3f4411c30cdefc3
Parents: d7053be
Author: Burak Yavuz brk...@gmail.com
Authored: Wed Aug 12 20:59:38 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Wed Aug 12 20:59:38 2015 -0700

--
 R/install-dev.bat| 5 -
 make-distribution.sh | 1 -
 2 files changed, 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2fb4901b/R/install-dev.bat
--
diff --git a/R/install-dev.bat b/R/install-dev.bat
index f32670b..008a5c6 100644
--- a/R/install-dev.bat
+++ b/R/install-dev.bat
@@ -25,8 +25,3 @@ set SPARK_HOME=%~dp0..
 MKDIR %SPARK_HOME%\R\lib
 
 R.exe CMD INSTALL --library=%SPARK_HOME%\R\lib  %SPARK_HOME%\R\pkg\
-
-rem Zip the SparkR package so that it can be distributed to worker nodes on 
YARN
-pushd %SPARK_HOME%\R\lib
-%JAVA_HOME%\bin\jar.exe cfM %SPARK_HOME%\R\lib\sparkr.zip SparkR
-popd

http://git-wip-us.apache.org/repos/asf/spark/blob/2fb4901b/make-distribution.sh
--
diff --git a/make-distribution.sh b/make-distribution.sh
index 4789b0e..247a813 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -219,7 +219,6 @@ cp -r $SPARK_HOME/ec2 $DISTDIR
 if [ -d $SPARK_HOME/R/lib/SparkR ]; then
   mkdir -p $DISTDIR/R/lib
   cp -r $SPARK_HOME/R/lib/SparkR $DISTDIR/R/lib
-  cp $SPARK_HOME/R/lib/sparkr.zip $DISTDIR/R/lib
 fi
 
 # Download and copy in tachyon, if requested


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-9916] [BUILD] [SPARKR] removed left-over sparkr.zip copy/create commands from codebase

2015-08-12 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 af470a757 - 3d1b9f007


[SPARK-9916] [BUILD] [SPARKR] removed left-over sparkr.zip copy/create commands 
from codebase

sparkr.zip is now built by SparkSubmit on a need-to-build basis.

cc shivaram

Author: Burak Yavuz brk...@gmail.com

Closes #8147 from brkyvz/make-dist-fix.

(cherry picked from commit 2fb4901b71cee65d40a43e61e3f4411c30cdefc3)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3d1b9f00
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3d1b9f00
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3d1b9f00

Branch: refs/heads/branch-1.5
Commit: 3d1b9f007b9b6a9bb4e146de32bd34affa723e12
Parents: af470a7
Author: Burak Yavuz brk...@gmail.com
Authored: Wed Aug 12 20:59:38 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Wed Aug 12 20:59:47 2015 -0700

--
 R/install-dev.bat| 5 -
 make-distribution.sh | 1 -
 2 files changed, 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3d1b9f00/R/install-dev.bat
--
diff --git a/R/install-dev.bat b/R/install-dev.bat
index f32670b..008a5c6 100644
--- a/R/install-dev.bat
+++ b/R/install-dev.bat
@@ -25,8 +25,3 @@ set SPARK_HOME=%~dp0..
 MKDIR %SPARK_HOME%\R\lib
 
 R.exe CMD INSTALL --library=%SPARK_HOME%\R\lib  %SPARK_HOME%\R\pkg\
-
-rem Zip the SparkR package so that it can be distributed to worker nodes on 
YARN
-pushd %SPARK_HOME%\R\lib
-%JAVA_HOME%\bin\jar.exe cfM %SPARK_HOME%\R\lib\sparkr.zip SparkR
-popd

http://git-wip-us.apache.org/repos/asf/spark/blob/3d1b9f00/make-distribution.sh
--
diff --git a/make-distribution.sh b/make-distribution.sh
index 8589255..04ad005 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -219,7 +219,6 @@ cp -r $SPARK_HOME/ec2 $DISTDIR
 if [ -d $SPARK_HOME/R/lib/SparkR ]; then
   mkdir -p $DISTDIR/R/lib
   cp -r $SPARK_HOME/R/lib/SparkR $DISTDIR/R/lib
-  cp $SPARK_HOME/R/lib/sparkr.zip $DISTDIR/R/lib
 fi
 
 # Download and copy in tachyon, if requested


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8844] [SPARKR] head/collect is broken in SparkR.

2015-08-16 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 182f9b7a6 - 5f9ce738f


[SPARK-8844] [SPARKR] head/collect is broken in SparkR.

This is a WIP patch for SPARK-8844  for collecting reviews.

This bug is about reading an empty DataFrame. in readCol(),
  lapply(1:numRows, function(x) {
does not take into consideration the case where numRows = 0.

Will add unit test case.

Author: Sun Rui rui@intel.com

Closes #7419 from sun-rui/SPARK-8844.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5f9ce738
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5f9ce738
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5f9ce738

Branch: refs/heads/master
Commit: 5f9ce738fe6bab3f0caffad0df1d3876178cf469
Parents: 182f9b7
Author: Sun Rui rui@intel.com
Authored: Sun Aug 16 00:30:02 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Sun Aug 16 00:30:02 2015 -0700

--
 R/pkg/R/deserialize.R| 16 ++--
 R/pkg/inst/tests/test_sparkSQL.R | 20 
 2 files changed, 30 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5f9ce738/R/pkg/R/deserialize.R
--
diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R
index 6d364f7..33bf13e 100644
--- a/R/pkg/R/deserialize.R
+++ b/R/pkg/R/deserialize.R
@@ -176,10 +176,14 @@ readRow - function(inputCon) {
 
 # Take a single column as Array[Byte] and deserialize it into an atomic vector
 readCol - function(inputCon, numRows) {
-  # sapply can not work with POSIXlt
-  do.call(c, lapply(1:numRows, function(x) {
-value - readObject(inputCon)
-# Replace NULL with NA so we can coerce to vectors
-if (is.null(value)) NA else value
-  }))
+  if (numRows  0) {
+# sapply can not work with POSIXlt
+do.call(c, lapply(1:numRows, function(x) {
+  value - readObject(inputCon)
+  # Replace NULL with NA so we can coerce to vectors
+  if (is.null(value)) NA else value
+}))
+  } else {
+vector()
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/5f9ce738/R/pkg/inst/tests/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index e6d3b21..c77f633 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -408,6 +408,14 @@ test_that(collect() returns a data.frame, {
   expect_equal(names(rdf)[1], age)
   expect_equal(nrow(rdf), 3)
   expect_equal(ncol(rdf), 2)
+
+  # collect() returns data correctly from a DataFrame with 0 row
+  df0 - limit(df, 0)
+  rdf - collect(df0)
+  expect_true(is.data.frame(rdf))
+  expect_equal(names(rdf)[1], age)
+  expect_equal(nrow(rdf), 0)
+  expect_equal(ncol(rdf), 2)
 })
 
 test_that(limit() returns DataFrame with the correct number of rows, {
@@ -492,6 +500,18 @@ test_that(head() and first() return the correct data, {
 
   testFirst - first(df)
   expect_equal(nrow(testFirst), 1)
+
+  # head() and first() return the correct data on
+  # a DataFrame with 0 row
+  df0 - limit(df, 0)
+
+  testHead - head(df0)
+  expect_equal(nrow(testHead), 0)
+  expect_equal(ncol(testHead), 2)
+
+  testFirst - first(df0)
+  expect_equal(nrow(testFirst), 0)
+  expect_equal(ncol(testFirst), 2)
 })
 
 test_that(distinct() and unique on DataFrames, {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8844] [SPARKR] head/collect is broken in SparkR.

2015-08-16 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 881baf100 - 4f75ce2e1


[SPARK-8844] [SPARKR] head/collect is broken in SparkR.

This is a WIP patch for SPARK-8844  for collecting reviews.

This bug is about reading an empty DataFrame. in readCol(),
  lapply(1:numRows, function(x) {
does not take into consideration the case where numRows = 0.

Will add unit test case.

Author: Sun Rui rui@intel.com

Closes #7419 from sun-rui/SPARK-8844.

(cherry picked from commit 5f9ce738fe6bab3f0caffad0df1d3876178cf469)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4f75ce2e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4f75ce2e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4f75ce2e

Branch: refs/heads/branch-1.5
Commit: 4f75ce2e193c813f4e3ad067749b6e7b4f0ee135
Parents: 881baf1
Author: Sun Rui rui@intel.com
Authored: Sun Aug 16 00:30:02 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Sun Aug 16 00:30:10 2015 -0700

--
 R/pkg/R/deserialize.R| 16 ++--
 R/pkg/inst/tests/test_sparkSQL.R | 20 
 2 files changed, 30 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/4f75ce2e/R/pkg/R/deserialize.R
--
diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R
index 6d364f7..33bf13e 100644
--- a/R/pkg/R/deserialize.R
+++ b/R/pkg/R/deserialize.R
@@ -176,10 +176,14 @@ readRow - function(inputCon) {
 
 # Take a single column as Array[Byte] and deserialize it into an atomic vector
 readCol - function(inputCon, numRows) {
-  # sapply can not work with POSIXlt
-  do.call(c, lapply(1:numRows, function(x) {
-value - readObject(inputCon)
-# Replace NULL with NA so we can coerce to vectors
-if (is.null(value)) NA else value
-  }))
+  if (numRows  0) {
+# sapply can not work with POSIXlt
+do.call(c, lapply(1:numRows, function(x) {
+  value - readObject(inputCon)
+  # Replace NULL with NA so we can coerce to vectors
+  if (is.null(value)) NA else value
+}))
+  } else {
+vector()
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/4f75ce2e/R/pkg/inst/tests/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index e6d3b21..c77f633 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -408,6 +408,14 @@ test_that(collect() returns a data.frame, {
   expect_equal(names(rdf)[1], age)
   expect_equal(nrow(rdf), 3)
   expect_equal(ncol(rdf), 2)
+
+  # collect() returns data correctly from a DataFrame with 0 row
+  df0 - limit(df, 0)
+  rdf - collect(df0)
+  expect_true(is.data.frame(rdf))
+  expect_equal(names(rdf)[1], age)
+  expect_equal(nrow(rdf), 0)
+  expect_equal(ncol(rdf), 2)
 })
 
 test_that(limit() returns DataFrame with the correct number of rows, {
@@ -492,6 +500,18 @@ test_that(head() and first() return the correct data, {
 
   testFirst - first(df)
   expect_equal(nrow(testFirst), 1)
+
+  # head() and first() return the correct data on
+  # a DataFrame with 0 row
+  df0 - limit(df, 0)
+
+  testHead - head(df0)
+  expect_equal(nrow(testHead), 0)
+  expect_equal(ncol(testHead), 2)
+
+  testFirst - first(df0)
+  expect_equal(nrow(testFirst), 0)
+  expect_equal(ncol(testFirst), 2)
 })
 
 test_that(distinct() and unique on DataFrames, {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-9856] [SPARKR] Add expression functions into SparkR whose params are complicated

2015-08-19 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master f3e177917 - 2fcb9cb95


[SPARK-9856] [SPARKR] Add expression functions into SparkR whose params are 
complicated

I added lots of Column functinos into SparkR. And I also added `rand(seed: 
Int)` and `randn(seed: Int)` in Scala. Since we need such APIs for R integer 
type.

### JIRA
[[SPARK-9856] Add expression functions into SparkR whose params are complicated 
- ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9856)

Author: Yu ISHIKAWA yuu.ishik...@gmail.com

Closes #8264 from yu-iskw/SPARK-9856-3.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2fcb9cb9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2fcb9cb9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2fcb9cb9

Branch: refs/heads/master
Commit: 2fcb9cb9552dac1d78dcca5d4d5032b4fa6c985c
Parents: f3e1779
Author: Yu ISHIKAWA yuu.ishik...@gmail.com
Authored: Wed Aug 19 10:41:14 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Wed Aug 19 10:41:14 2015 -0700

--
 R/pkg/NAMESPACE |  28 ++
 R/pkg/R/functions.R | 415 +++
 R/pkg/R/generics.R  | 113 +
 R/pkg/inst/tests/test_sparkSQL.R|  98 -
 .../apache/spark/api/r/RBackendHandler.scala|   1 +
 5 files changed, 649 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2fcb9cb9/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 8fa12d5..111a2dc 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -84,6 +84,7 @@ exportClasses(Column)
 
 exportMethods(abs,
   acos,
+  add_months,
   alias,
   approxCountDistinct,
   asc,
@@ -101,12 +102,17 @@ exportMethods(abs,
   ceil,
   ceiling,
   concat,
+  concat_ws,
   contains,
+  conv,
   cos,
   cosh,
   count,
   countDistinct,
   crc32,
+  date_add,
+  date_format,
+  date_sub,
   datediff,
   dayofmonth,
   dayofyear,
@@ -115,9 +121,14 @@ exportMethods(abs,
   exp,
   explode,
   expm1,
+  expr,
   factorial,
   first,
   floor,
+  format_number,
+  format_string,
+  from_unixtime,
+  from_utc_timestamp,
   getField,
   getItem,
   greatest,
@@ -125,6 +136,7 @@ exportMethods(abs,
   hour,
   hypot,
   initcap,
+  instr,
   isNaN,
   isNotNull,
   isNull,
@@ -135,11 +147,13 @@ exportMethods(abs,
   levenshtein,
   like,
   lit,
+  locate,
   log,
   log10,
   log1p,
   log2,
   lower,
+  lpad,
   ltrim,
   max,
   md5,
@@ -152,16 +166,26 @@ exportMethods(abs,
   n_distinct,
   nanvl,
   negate,
+  next_day,
   otherwise,
   pmod,
   quarter,
+  rand,
+  randn,
+  regexp_extract,
+  regexp_replace,
   reverse,
   rint,
   rlike,
   round,
+  rpad,
   rtrim,
   second,
   sha1,
+  sha2,
+  shiftLeft,
+  shiftRight,
+  shiftRightUnsigned,
   sign,
   signum,
   sin,
@@ -171,6 +195,7 @@ exportMethods(abs,
   sqrt,
   startsWith,
   substr,
+  substring_index,
   sum,
   sumDistinct,
   tan,
@@ -178,9 +203,12 @@ exportMethods(abs,
   toDegrees,
   toRadians,
   to_date,
+  to_utc_timestamp,
+  translate,
   trim,
   unbase64,
   unhex,
+  unix_timestamp,
   upper,
   weekofyear,
   when,

http://git-wip-us.apache.org/repos/asf/spark/blob/2fcb9cb9/R/pkg/R/functions.R
--
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 366c230..5dba088 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -166,6 +166,421 @@ setMethod(n, signature(x

spark git commit: [SPARK-9856] [SPARKR] Add expression functions into SparkR whose params are complicated

2015-08-19 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 bebe63dfe - a8e880818


[SPARK-9856] [SPARKR] Add expression functions into SparkR whose params are 
complicated

I added lots of Column functinos into SparkR. And I also added `rand(seed: 
Int)` and `randn(seed: Int)` in Scala. Since we need such APIs for R integer 
type.

### JIRA
[[SPARK-9856] Add expression functions into SparkR whose params are complicated 
- ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9856)

Author: Yu ISHIKAWA yuu.ishik...@gmail.com

Closes #8264 from yu-iskw/SPARK-9856-3.

(cherry picked from commit 2fcb9cb9552dac1d78dcca5d4d5032b4fa6c985c)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a8e88081
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a8e88081
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a8e88081

Branch: refs/heads/branch-1.5
Commit: a8e8808181eec19f34783943ebb42cb8feb0e639
Parents: bebe63d
Author: Yu ISHIKAWA yuu.ishik...@gmail.com
Authored: Wed Aug 19 10:41:14 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Wed Aug 19 10:41:22 2015 -0700

--
 R/pkg/NAMESPACE |  28 ++
 R/pkg/R/functions.R | 415 +++
 R/pkg/R/generics.R  | 113 +
 R/pkg/inst/tests/test_sparkSQL.R|  98 -
 .../apache/spark/api/r/RBackendHandler.scala|   1 +
 5 files changed, 649 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a8e88081/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 8fa12d5..111a2dc 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -84,6 +84,7 @@ exportClasses(Column)
 
 exportMethods(abs,
   acos,
+  add_months,
   alias,
   approxCountDistinct,
   asc,
@@ -101,12 +102,17 @@ exportMethods(abs,
   ceil,
   ceiling,
   concat,
+  concat_ws,
   contains,
+  conv,
   cos,
   cosh,
   count,
   countDistinct,
   crc32,
+  date_add,
+  date_format,
+  date_sub,
   datediff,
   dayofmonth,
   dayofyear,
@@ -115,9 +121,14 @@ exportMethods(abs,
   exp,
   explode,
   expm1,
+  expr,
   factorial,
   first,
   floor,
+  format_number,
+  format_string,
+  from_unixtime,
+  from_utc_timestamp,
   getField,
   getItem,
   greatest,
@@ -125,6 +136,7 @@ exportMethods(abs,
   hour,
   hypot,
   initcap,
+  instr,
   isNaN,
   isNotNull,
   isNull,
@@ -135,11 +147,13 @@ exportMethods(abs,
   levenshtein,
   like,
   lit,
+  locate,
   log,
   log10,
   log1p,
   log2,
   lower,
+  lpad,
   ltrim,
   max,
   md5,
@@ -152,16 +166,26 @@ exportMethods(abs,
   n_distinct,
   nanvl,
   negate,
+  next_day,
   otherwise,
   pmod,
   quarter,
+  rand,
+  randn,
+  regexp_extract,
+  regexp_replace,
   reverse,
   rint,
   rlike,
   round,
+  rpad,
   rtrim,
   second,
   sha1,
+  sha2,
+  shiftLeft,
+  shiftRight,
+  shiftRightUnsigned,
   sign,
   signum,
   sin,
@@ -171,6 +195,7 @@ exportMethods(abs,
   sqrt,
   startsWith,
   substr,
+  substring_index,
   sum,
   sumDistinct,
   tan,
@@ -178,9 +203,12 @@ exportMethods(abs,
   toDegrees,
   toRadians,
   to_date,
+  to_utc_timestamp,
+  translate,
   trim,
   unbase64,
   unhex,
+  unix_timestamp,
   upper,
   weekofyear,
   when,

http://git-wip-us.apache.org/repos/asf/spark/blob/a8e88081/R/pkg/R/functions.R
--
diff --git a/R/pkg/R/functions.R b/R/pkg/R

spark git commit: [SPARK-10106] [SPARKR] Add `ifelse` Column function to SparkR

2015-08-19 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 28a98464e - d898c33f7


[SPARK-10106] [SPARKR] Add `ifelse` Column function to SparkR

### JIRA
[[SPARK-10106] Add `ifelse` Column function to SparkR - ASF 
JIRA](https://issues.apache.org/jira/browse/SPARK-10106)

Author: Yu ISHIKAWA yuu.ishik...@gmail.com

Closes #8303 from yu-iskw/SPARK-10106.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d898c33f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d898c33f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d898c33f

Branch: refs/heads/master
Commit: d898c33f774b9a3db2fb6aa8f0cb2c2ac6004b58
Parents: 28a9846
Author: Yu ISHIKAWA yuu.ishik...@gmail.com
Authored: Wed Aug 19 12:39:37 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Wed Aug 19 12:39:37 2015 -0700

--
 R/pkg/NAMESPACE  |  1 +
 R/pkg/R/functions.R  | 19 +++
 R/pkg/inst/tests/test_sparkSQL.R |  3 ++-
 3 files changed, 22 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d898c33f/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 111a2dc..3e5c89d 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -135,6 +135,7 @@ exportMethods(abs,
   hex,
   hour,
   hypot,
+  ifelse,
   initcap,
   instr,
   isNaN,

http://git-wip-us.apache.org/repos/asf/spark/blob/d898c33f/R/pkg/R/functions.R
--
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 5dba088..b5879bd 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -594,3 +594,22 @@ setMethod(when, signature(condition = Column, value = 
ANY),
   jc - callJStatic(org.apache.spark.sql.functions, when, 
condition, value)
   column(jc)
   })
+
+#' ifelse
+#'
+#' Evaluates a list of conditions and returns `yes` if the conditions are 
satisfied.
+#' Otherwise `no` is returned for unmatched conditions.
+#'
+#' @rdname column
+setMethod(ifelse,
+  signature(test = Column, yes = ANY, no = ANY),
+  function(test, yes, no) {
+  test - test@jc
+  yes - ifelse(class(yes) == Column, yes@jc, yes)
+  no - ifelse(class(no) == Column, no@jc, no)
+  jc - callJMethod(callJStatic(org.apache.spark.sql.functions,
+when,
+test, yes),
+otherwise, no)
+  column(jc)
+  })

http://git-wip-us.apache.org/repos/asf/spark/blob/d898c33f/R/pkg/inst/tests/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 670017e..556b8c5 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -813,11 +813,12 @@ test_that(greatest() and least() on a DataFrame, {
   expect_equal(collect(select(df, least(df$a, df$b)))[, 1], c(1, 3))
 })
 
-test_that(when() and otherwise() on a DataFrame, {
+test_that(when(), otherwise() and ifelse() on a DataFrame, {
   l - list(list(a = 1, b = 2), list(a = 3, b = 4))
   df - createDataFrame(sqlContext, l)
   expect_equal(collect(select(df, when(df$a  1  df$b  2, 1)))[, 1], c(NA, 
1))
   expect_equal(collect(select(df, otherwise(when(df$a  1, 1), 0)))[, 1], c(0, 
1))
+  expect_equal(collect(select(df, ifelse(df$a  1  df$b  2, 0, 1)))[, 1], 
c(1, 0))
 })
 
 test_that(group by, {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-10106] [SPARKR] Add `ifelse` Column function to SparkR

2015-08-19 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 f25c32475 - ba369258d


[SPARK-10106] [SPARKR] Add `ifelse` Column function to SparkR

### JIRA
[[SPARK-10106] Add `ifelse` Column function to SparkR - ASF 
JIRA](https://issues.apache.org/jira/browse/SPARK-10106)

Author: Yu ISHIKAWA yuu.ishik...@gmail.com

Closes #8303 from yu-iskw/SPARK-10106.

(cherry picked from commit d898c33f774b9a3db2fb6aa8f0cb2c2ac6004b58)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ba369258
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ba369258
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ba369258

Branch: refs/heads/branch-1.5
Commit: ba369258d94ba09b0bfc15d17f6851aa72a4d6d7
Parents: f25c324
Author: Yu ISHIKAWA yuu.ishik...@gmail.com
Authored: Wed Aug 19 12:39:37 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Wed Aug 19 12:39:44 2015 -0700

--
 R/pkg/NAMESPACE  |  1 +
 R/pkg/R/functions.R  | 19 +++
 R/pkg/inst/tests/test_sparkSQL.R |  3 ++-
 3 files changed, 22 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ba369258/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 111a2dc..3e5c89d 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -135,6 +135,7 @@ exportMethods(abs,
   hex,
   hour,
   hypot,
+  ifelse,
   initcap,
   instr,
   isNaN,

http://git-wip-us.apache.org/repos/asf/spark/blob/ba369258/R/pkg/R/functions.R
--
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 5dba088..b5879bd 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -594,3 +594,22 @@ setMethod(when, signature(condition = Column, value = 
ANY),
   jc - callJStatic(org.apache.spark.sql.functions, when, 
condition, value)
   column(jc)
   })
+
+#' ifelse
+#'
+#' Evaluates a list of conditions and returns `yes` if the conditions are 
satisfied.
+#' Otherwise `no` is returned for unmatched conditions.
+#'
+#' @rdname column
+setMethod(ifelse,
+  signature(test = Column, yes = ANY, no = ANY),
+  function(test, yes, no) {
+  test - test@jc
+  yes - ifelse(class(yes) == Column, yes@jc, yes)
+  no - ifelse(class(no) == Column, no@jc, no)
+  jc - callJMethod(callJStatic(org.apache.spark.sql.functions,
+when,
+test, yes),
+otherwise, no)
+  column(jc)
+  })

http://git-wip-us.apache.org/repos/asf/spark/blob/ba369258/R/pkg/inst/tests/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 670017e..556b8c5 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -813,11 +813,12 @@ test_that(greatest() and least() on a DataFrame, {
   expect_equal(collect(select(df, least(df$a, df$b)))[, 1], c(1, 3))
 })
 
-test_that(when() and otherwise() on a DataFrame, {
+test_that(when(), otherwise() and ifelse() on a DataFrame, {
   l - list(list(a = 1, b = 2), list(a = 3, b = 4))
   df - createDataFrame(sqlContext, l)
   expect_equal(collect(select(df, when(df$a  1  df$b  2, 1)))[, 1], c(NA, 
1))
   expect_equal(collect(select(df, otherwise(when(df$a  1, 1), 0)))[, 1], c(0, 
1))
+  expect_equal(collect(select(df, ifelse(df$a  1  df$b  2, 0, 1)))[, 1], 
c(1, 0))
 })
 
 test_that(group by, {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARKR] [MINOR] Get rid of a long line warning

2015-08-18 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 9b42e2404 - 0a1385e31


[SPARKR] [MINOR] Get rid of a long line warning

```
R/functions.R:74:1: style: lines should not be more than 100 characters.
jc - callJStatic(org.apache.spark.sql.functions, lit, 
ifelse(class(x) == Column, xjc, x))
^
```

Author: Yu ISHIKAWA yuu.ishik...@gmail.com

Closes #8297 from yu-iskw/minor-lint-r.

(cherry picked from commit b4b35f133aecaf84f04e8e444b660a33c6b7894a)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0a1385e3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0a1385e3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0a1385e3

Branch: refs/heads/branch-1.5
Commit: 0a1385e319a2bca115b6bfefe7820b78ce5fb753
Parents: 9b42e24
Author: Yu ISHIKAWA yuu.ishik...@gmail.com
Authored: Tue Aug 18 19:18:05 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Tue Aug 18 19:18:13 2015 -0700

--
 R/pkg/R/functions.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0a1385e3/R/pkg/R/functions.R
--
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 6eef4d6..e606b20 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -71,7 +71,9 @@ createFunctions()
 #' @return Creates a Column class of literal value.
 setMethod(lit, signature(ANY),
   function(x) {
-jc - callJStatic(org.apache.spark.sql.functions, lit, 
ifelse(class(x) == Column, x@jc, x))
+jc - callJStatic(org.apache.spark.sql.functions,
+  lit,
+  ifelse(class(x) == Column, x@jc, x))
 column(jc)
   })
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARKR] [MINOR] Get rid of a long line warning

2015-08-18 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 1f8902964 - b4b35f133


[SPARKR] [MINOR] Get rid of a long line warning

```
R/functions.R:74:1: style: lines should not be more than 100 characters.
jc - callJStatic(org.apache.spark.sql.functions, lit, 
ifelse(class(x) == Column, xjc, x))
^
```

Author: Yu ISHIKAWA yuu.ishik...@gmail.com

Closes #8297 from yu-iskw/minor-lint-r.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b4b35f13
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b4b35f13
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b4b35f13

Branch: refs/heads/master
Commit: b4b35f133aecaf84f04e8e444b660a33c6b7894a
Parents: 1f89029
Author: Yu ISHIKAWA yuu.ishik...@gmail.com
Authored: Tue Aug 18 19:18:05 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Tue Aug 18 19:18:05 2015 -0700

--
 R/pkg/R/functions.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b4b35f13/R/pkg/R/functions.R
--
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 6eef4d6..e606b20 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -71,7 +71,9 @@ createFunctions()
 #' @return Creates a Column class of literal value.
 setMethod(lit, signature(ANY),
   function(x) {
-jc - callJStatic(org.apache.spark.sql.functions, lit, 
ifelse(class(x) == Column, x@jc, x))
+jc - callJStatic(org.apache.spark.sql.functions,
+  lit,
+  ifelse(class(x) == Column, x@jc, x))
 column(jc)
   })
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-10075] [SPARKR] Add `when` expressino function in SparkR

2015-08-18 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master a5b5b9365 - bf32c1f7f


[SPARK-10075] [SPARKR] Add `when` expressino function in SparkR

- Add `when` and `otherwise` as `Column` methods
- Add `When` as an expression function
- Add `%otherwise%` infix as an alias of `otherwise`

Since R doesn't support a feature like method chaining, 
`otherwise(when(condition, value), value)` style is a little annoying for me. 
If `%otherwise%` looks strange for shivaram, I can remove it. What do you think?

### JIRA
[[SPARK-10075] Add `when` expressino function in SparkR - ASF 
JIRA](https://issues.apache.org/jira/browse/SPARK-10075)

Author: Yu ISHIKAWA yuu.ishik...@gmail.com

Closes #8266 from yu-iskw/SPARK-10075.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bf32c1f7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bf32c1f7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bf32c1f7

Branch: refs/heads/master
Commit: bf32c1f7f47dd907d787469f979c5859e02ce5e6
Parents: a5b5b93
Author: Yu ISHIKAWA yuu.ishik...@gmail.com
Authored: Tue Aug 18 20:27:36 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Tue Aug 18 20:27:36 2015 -0700

--
 R/pkg/NAMESPACE  |  2 ++
 R/pkg/R/column.R | 14 ++
 R/pkg/R/functions.R  | 14 ++
 R/pkg/R/generics.R   |  8 
 R/pkg/inst/tests/test_sparkSQL.R |  7 +++
 5 files changed, 45 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/bf32c1f7/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 607aef2..8fa12d5 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -152,6 +152,7 @@ exportMethods(abs,
   n_distinct,
   nanvl,
   negate,
+  otherwise,
   pmod,
   quarter,
   reverse,
@@ -182,6 +183,7 @@ exportMethods(abs,
   unhex,
   upper,
   weekofyear,
+  when,
   year)
 
 exportClasses(GroupedData)

http://git-wip-us.apache.org/repos/asf/spark/blob/bf32c1f7/R/pkg/R/column.R
--
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 328f595..5a07ebd 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -203,3 +203,17 @@ setMethod(%in%,
 jc - callJMethod(x@jc, in, table)
 return(column(jc))
   })
+
+#' otherwise
+#'
+#' If values in the specified column are null, returns the value. 
+#' Can be used in conjunction with `when` to specify a default value for 
expressions.
+#'
+#' @rdname column
+setMethod(otherwise,
+  signature(x = Column, value = ANY),
+  function(x, value) {
+value - ifelse(class(value) == Column, value@jc, value)
+jc - callJMethod(x@jc, otherwise, value)
+column(jc)
+  })

http://git-wip-us.apache.org/repos/asf/spark/blob/bf32c1f7/R/pkg/R/functions.R
--
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index e606b20..366c230 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -165,3 +165,17 @@ setMethod(n, signature(x = Column),
   function(x) {
 count(x)
   })
+
+#' when
+#'
+#' Evaluates a list of conditions and returns one of multiple possible result 
expressions.
+#' For unmatched expressions null is returned.
+#'
+#' @rdname column
+setMethod(when, signature(condition = Column, value = ANY),
+  function(condition, value) {
+  condition - condition@jc
+  value - ifelse(class(value) == Column, value@jc, value)
+  jc - callJStatic(org.apache.spark.sql.functions, when, 
condition, value)
+  column(jc)
+  })

http://git-wip-us.apache.org/repos/asf/spark/blob/bf32c1f7/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 5c1cc98..338b32e 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -651,6 +651,14 @@ setGeneric(rlike, function(x, ...) { 
standardGeneric(rlike) })
 #' @export
 setGeneric(startsWith, function(x, ...) { standardGeneric(startsWith) })
 
+#' @rdname column
+#' @export
+setGeneric(when, function(condition, value) { standardGeneric(when) })
+
+#' @rdname column
+#' @export
+setGeneric(otherwise, function(x, value) { standardGeneric(otherwise) })
+
 
 ## Expression Function Methods ##
 

http://git-wip-us.apache.org/repos/asf/spark/blob/bf32c1f7/R/pkg/inst/tests/test_sparkSQL.R

spark git commit: [SPARK-10075] [SPARKR] Add `when` expressino function in SparkR

2015-08-18 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 bb2fb59f9 - ebaeb1892


[SPARK-10075] [SPARKR] Add `when` expressino function in SparkR

- Add `when` and `otherwise` as `Column` methods
- Add `When` as an expression function
- Add `%otherwise%` infix as an alias of `otherwise`

Since R doesn't support a feature like method chaining, 
`otherwise(when(condition, value), value)` style is a little annoying for me. 
If `%otherwise%` looks strange for shivaram, I can remove it. What do you think?

### JIRA
[[SPARK-10075] Add `when` expressino function in SparkR - ASF 
JIRA](https://issues.apache.org/jira/browse/SPARK-10075)

Author: Yu ISHIKAWA yuu.ishik...@gmail.com

Closes #8266 from yu-iskw/SPARK-10075.

(cherry picked from commit bf32c1f7f47dd907d787469f979c5859e02ce5e6)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ebaeb189
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ebaeb189
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ebaeb189

Branch: refs/heads/branch-1.5
Commit: ebaeb189260dd338fc5a91d8ec3ff6d45989991a
Parents: bb2fb59
Author: Yu ISHIKAWA yuu.ishik...@gmail.com
Authored: Tue Aug 18 20:27:36 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Tue Aug 18 20:29:34 2015 -0700

--
 R/pkg/NAMESPACE  |  2 ++
 R/pkg/R/column.R | 14 ++
 R/pkg/R/functions.R  | 14 ++
 R/pkg/R/generics.R   |  8 
 R/pkg/inst/tests/test_sparkSQL.R |  7 +++
 5 files changed, 45 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ebaeb189/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 607aef2..8fa12d5 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -152,6 +152,7 @@ exportMethods(abs,
   n_distinct,
   nanvl,
   negate,
+  otherwise,
   pmod,
   quarter,
   reverse,
@@ -182,6 +183,7 @@ exportMethods(abs,
   unhex,
   upper,
   weekofyear,
+  when,
   year)
 
 exportClasses(GroupedData)

http://git-wip-us.apache.org/repos/asf/spark/blob/ebaeb189/R/pkg/R/column.R
--
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 328f595..5a07ebd 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -203,3 +203,17 @@ setMethod(%in%,
 jc - callJMethod(x@jc, in, table)
 return(column(jc))
   })
+
+#' otherwise
+#'
+#' If values in the specified column are null, returns the value. 
+#' Can be used in conjunction with `when` to specify a default value for 
expressions.
+#'
+#' @rdname column
+setMethod(otherwise,
+  signature(x = Column, value = ANY),
+  function(x, value) {
+value - ifelse(class(value) == Column, value@jc, value)
+jc - callJMethod(x@jc, otherwise, value)
+column(jc)
+  })

http://git-wip-us.apache.org/repos/asf/spark/blob/ebaeb189/R/pkg/R/functions.R
--
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index e606b20..366c230 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -165,3 +165,17 @@ setMethod(n, signature(x = Column),
   function(x) {
 count(x)
   })
+
+#' when
+#'
+#' Evaluates a list of conditions and returns one of multiple possible result 
expressions.
+#' For unmatched expressions null is returned.
+#'
+#' @rdname column
+setMethod(when, signature(condition = Column, value = ANY),
+  function(condition, value) {
+  condition - condition@jc
+  value - ifelse(class(value) == Column, value@jc, value)
+  jc - callJStatic(org.apache.spark.sql.functions, when, 
condition, value)
+  column(jc)
+  })

http://git-wip-us.apache.org/repos/asf/spark/blob/ebaeb189/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 5c1cc98..338b32e 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -651,6 +651,14 @@ setGeneric(rlike, function(x, ...) { 
standardGeneric(rlike) })
 #' @export
 setGeneric(startsWith, function(x, ...) { standardGeneric(startsWith) })
 
+#' @rdname column
+#' @export
+setGeneric(when, function(condition, value) { standardGeneric(when) })
+
+#' @rdname column
+#' @export
+setGeneric(otherwise, function(x, value) { standardGeneric(otherwise) })
+
 
 ## Expression Function Methods

spark git commit: [SPARK-9982] [SPARKR] SparkR DataFrame fail to return data of Decimal type

2015-08-20 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 257e9d727 - a7027e6d3


[SPARK-9982] [SPARKR] SparkR DataFrame fail to return data of Decimal type

Author: Alex Shkurenko ashkure...@enova.com

Closes #8239 from ashkurenko/master.

(cherry picked from commit 39e91fe2fd43044cc734d55625a3c03284b69f09)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a7027e6d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a7027e6d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a7027e6d

Branch: refs/heads/branch-1.5
Commit: a7027e6d3369a1157c53557c8215273606086d84
Parents: 257e9d7
Author: Alex Shkurenko ashkure...@enova.com
Authored: Thu Aug 20 10:16:38 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Thu Aug 20 10:16:57 2015 -0700

--
 core/src/main/scala/org/apache/spark/api/r/SerDe.scala | 5 +
 1 file changed, 5 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a7027e6d/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
--
diff --git a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala 
b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
index d5b4260..3c89f24 100644
--- a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
@@ -181,6 +181,7 @@ private[spark] object SerDe {
   // Boolean - logical
   // Float - double
   // Double - double
+  // Decimal - double
   // Long - double
   // Array[Byte] - raw
   // Date - Date
@@ -219,6 +220,10 @@ private[spark] object SerDe {
 case float | java.lang.Float =
   writeType(dos, double)
   writeDouble(dos, value.asInstanceOf[Float].toDouble)
+case decimal | java.math.BigDecimal =
+  writeType(dos, double)
+  val javaDecimal = value.asInstanceOf[java.math.BigDecimal]
+  writeDouble(dos, scala.math.BigDecimal(javaDecimal).toDouble)
 case double | java.lang.Double =
   writeType(dos, double)
   writeDouble(dos, value.asInstanceOf[Double])


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-9982] [SPARKR] SparkR DataFrame fail to return data of Decimal type

2015-08-20 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 52c60537a - 39e91fe2f


[SPARK-9982] [SPARKR] SparkR DataFrame fail to return data of Decimal type

Author: Alex Shkurenko ashkure...@enova.com

Closes #8239 from ashkurenko/master.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/39e91fe2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/39e91fe2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/39e91fe2

Branch: refs/heads/master
Commit: 39e91fe2fd43044cc734d55625a3c03284b69f09
Parents: 52c6053
Author: Alex Shkurenko ashkure...@enova.com
Authored: Thu Aug 20 10:16:38 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Thu Aug 20 10:16:38 2015 -0700

--
 core/src/main/scala/org/apache/spark/api/r/SerDe.scala | 5 +
 1 file changed, 5 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/39e91fe2/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
--
diff --git a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala 
b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
index d5b4260..3c89f24 100644
--- a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
@@ -181,6 +181,7 @@ private[spark] object SerDe {
   // Boolean - logical
   // Float - double
   // Double - double
+  // Decimal - double
   // Long - double
   // Array[Byte] - raw
   // Date - Date
@@ -219,6 +220,10 @@ private[spark] object SerDe {
 case float | java.lang.Float =
   writeType(dos, double)
   writeDouble(dos, value.asInstanceOf[Float].toDouble)
+case decimal | java.math.BigDecimal =
+  writeType(dos, double)
+  val javaDecimal = value.asInstanceOf[java.math.BigDecimal]
+  writeDouble(dos, scala.math.BigDecimal(javaDecimal).toDouble)
 case double | java.lang.Double =
   writeType(dos, double)
   writeDouble(dos, value.asInstanceOf[Double])


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: Bump SparkR version string to 1.5.0

2015-08-18 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master badf7fa65 - 04e0fea79


Bump SparkR version string to 1.5.0

This patch is against master, but we need to apply it to 1.5 branch as well.

cc shivaram  and rxin

Author: Hossein hoss...@databricks.com

Closes #8291 from falaki/SparkRVersion1.5.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/04e0fea7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/04e0fea7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/04e0fea7

Branch: refs/heads/master
Commit: 04e0fea79b9acfa3a3cb81dbacb08f9d287b42c3
Parents: badf7fa
Author: Hossein hoss...@databricks.com
Authored: Tue Aug 18 18:02:22 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Tue Aug 18 18:02:22 2015 -0700

--
 R/pkg/DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/04e0fea7/R/pkg/DESCRIPTION
--
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 83e6489..d0d7201 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: SparkR
 Type: Package
 Title: R frontend for Spark
-Version: 1.4.0
+Version: 1.5.0
 Date: 2013-09-09
 Author: The Apache Software Foundation
 Maintainer: Shivaram Venkataraman shiva...@cs.berkeley.edu


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: Bump SparkR version string to 1.5.0

2015-08-18 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 4ee225af8 - 9b42e2404


Bump SparkR version string to 1.5.0

This patch is against master, but we need to apply it to 1.5 branch as well.

cc shivaram  and rxin

Author: Hossein hoss...@databricks.com

Closes #8291 from falaki/SparkRVersion1.5.

(cherry picked from commit 04e0fea79b9acfa3a3cb81dbacb08f9d287b42c3)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9b42e240
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9b42e240
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9b42e240

Branch: refs/heads/branch-1.5
Commit: 9b42e24049e072b315ec80e5bbe2ec5079a94704
Parents: 4ee225a
Author: Hossein hoss...@databricks.com
Authored: Tue Aug 18 18:02:22 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Tue Aug 18 18:02:31 2015 -0700

--
 R/pkg/DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9b42e240/R/pkg/DESCRIPTION
--
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 83e6489..d0d7201 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: SparkR
 Type: Package
 Title: R frontend for Spark
-Version: 1.4.0
+Version: 1.5.0
 Date: 2013-09-09
 Author: The Apache Software Foundation
 Maintainer: Shivaram Venkataraman shiva...@cs.berkeley.edu


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-10007] [SPARKR] Update `NAMESPACE` file in SparkR for simple parameters functions

2015-08-18 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 5723d26d7 - 1968276af


[SPARK-10007] [SPARKR] Update `NAMESPACE` file in SparkR for simple parameters 
functions

### JIRA
[[SPARK-10007] Update `NAMESPACE` file in SparkR for simple parameters 
functions - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-10007)

Author: Yuu ISHIKAWA yuu.ishik...@gmail.com

Closes #8277 from yu-iskw/SPARK-10007.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1968276a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1968276a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1968276a

Branch: refs/heads/master
Commit: 1968276af0f681fe51328b7dd795bd21724a5441
Parents: 5723d26
Author: Yuu ISHIKAWA yuu.ishik...@gmail.com
Authored: Tue Aug 18 09:10:59 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Tue Aug 18 09:10:59 2015 -0700

--
 R/pkg/NAMESPACE | 50 +++---
 1 file changed, 47 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1968276a/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index fd9dfdf..607aef2 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -87,48 +87,86 @@ exportMethods(abs,
   alias,
   approxCountDistinct,
   asc,
+  ascii,
   asin,
   atan,
   atan2,
   avg,
+  base64,
   between,
+  bin,
+  bitwiseNOT,
   cast,
   cbrt,
+  ceil,
   ceiling,
+  concat,
   contains,
   cos,
   cosh,
-  concat,
+  count,
   countDistinct,
+  crc32,
+  datediff,
+  dayofmonth,
+  dayofyear,
   desc,
   endsWith,
   exp,
+  explode,
   expm1,
+  factorial,
+  first,
   floor,
   getField,
   getItem,
   greatest,
+  hex,
+  hour,
   hypot,
+  initcap,
+  isNaN,
   isNotNull,
   isNull,
-  lit,
   last,
+  last_day,
   least,
+  length,
+  levenshtein,
   like,
+  lit,
   log,
   log10,
   log1p,
+  log2,
   lower,
+  ltrim,
   max,
+  md5,
   mean,
   min,
+  minute,
+  month,
+  months_between,
   n,
   n_distinct,
+  nanvl,
+  negate,
+  pmod,
+  quarter,
+  reverse,
   rint,
   rlike,
+  round,
+  rtrim,
+  second,
+  sha1,
   sign,
+  signum,
   sin,
   sinh,
+  size,
+  soundex,
   sqrt,
   startsWith,
   substr,
@@ -138,7 +176,13 @@ exportMethods(abs,
   tanh,
   toDegrees,
   toRadians,
-  upper)
+  to_date,
+  trim,
+  unbase64,
+  unhex,
+  upper,
+  weekofyear,
+  year)
 
 exportClasses(GroupedData)
 exportMethods(agg)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-10007] [SPARKR] Update `NAMESPACE` file in SparkR for simple parameters functions

2015-08-18 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 a512250cd - 20a760a00


[SPARK-10007] [SPARKR] Update `NAMESPACE` file in SparkR for simple parameters 
functions

### JIRA
[[SPARK-10007] Update `NAMESPACE` file in SparkR for simple parameters 
functions - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-10007)

Author: Yuu ISHIKAWA yuu.ishik...@gmail.com

Closes #8277 from yu-iskw/SPARK-10007.

(cherry picked from commit 1968276af0f681fe51328b7dd795bd21724a5441)
Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/20a760a0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/20a760a0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/20a760a0

Branch: refs/heads/branch-1.5
Commit: 20a760a00ae188a68b877f052842834e8b7570e6
Parents: a512250
Author: Yuu ISHIKAWA yuu.ishik...@gmail.com
Authored: Tue Aug 18 09:10:59 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Tue Aug 18 09:11:22 2015 -0700

--
 R/pkg/NAMESPACE | 50 +++---
 1 file changed, 47 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/20a760a0/R/pkg/NAMESPACE
--
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index fd9dfdf..607aef2 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -87,48 +87,86 @@ exportMethods(abs,
   alias,
   approxCountDistinct,
   asc,
+  ascii,
   asin,
   atan,
   atan2,
   avg,
+  base64,
   between,
+  bin,
+  bitwiseNOT,
   cast,
   cbrt,
+  ceil,
   ceiling,
+  concat,
   contains,
   cos,
   cosh,
-  concat,
+  count,
   countDistinct,
+  crc32,
+  datediff,
+  dayofmonth,
+  dayofyear,
   desc,
   endsWith,
   exp,
+  explode,
   expm1,
+  factorial,
+  first,
   floor,
   getField,
   getItem,
   greatest,
+  hex,
+  hour,
   hypot,
+  initcap,
+  isNaN,
   isNotNull,
   isNull,
-  lit,
   last,
+  last_day,
   least,
+  length,
+  levenshtein,
   like,
+  lit,
   log,
   log10,
   log1p,
+  log2,
   lower,
+  ltrim,
   max,
+  md5,
   mean,
   min,
+  minute,
+  month,
+  months_between,
   n,
   n_distinct,
+  nanvl,
+  negate,
+  pmod,
+  quarter,
+  reverse,
   rint,
   rlike,
+  round,
+  rtrim,
+  second,
+  sha1,
   sign,
+  signum,
   sin,
   sinh,
+  size,
+  soundex,
   sqrt,
   startsWith,
   substr,
@@ -138,7 +176,13 @@ exportMethods(abs,
   tanh,
   toDegrees,
   toRadians,
-  upper)
+  to_date,
+  trim,
+  unbase64,
+  unhex,
+  upper,
+  weekofyear,
+  year)
 
 exportClasses(GroupedData)
 exportMethods(agg)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-9053] [SPARKR] Fix spaces around parens, infix operators etc.

2015-07-31 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 6bba7509a - fc0e57e5a


[SPARK-9053] [SPARKR] Fix spaces around parens, infix operators etc.

### JIRA
[[SPARK-9053] Fix spaces around parens, infix operators etc. - ASF 
JIRA](https://issues.apache.org/jira/browse/SPARK-9053)

### The Result of `lint-r`
[The result of lint-r at the 
rivision:a4c83cb1e4b066cd60264b6572fd3e51d160d26a](https://gist.github.com/yu-iskw/d253d7f8ef351f86443d)

Author: Yu ISHIKAWA yuu.ishik...@gmail.com

Closes #7584 from yu-iskw/SPARK-9053 and squashes the following commits:

613170f [Yu ISHIKAWA] Ignore a warning about a space before a left parentheses
ede61e1 [Yu ISHIKAWA] Ignores two warnings about a space before a left 
parentheses. TODO: After updating `lintr`, we will remove the ignores
de3e0db [Yu ISHIKAWA] Add '## nolint start'  '## nolint end' statement to 
ignore infix space warnings
e233ea8 [Yu ISHIKAWA] [SPARK-9053][SparkR] Fix spaces around parens, infix 
operators etc.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fc0e57e5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fc0e57e5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fc0e57e5

Branch: refs/heads/master
Commit: fc0e57e5aba82a3f227fef05a843283e2ec893fc
Parents: 6bba750
Author: Yu ISHIKAWA yuu.ishik...@gmail.com
Authored: Fri Jul 31 09:33:38 2015 -0700
Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu
Committed: Fri Jul 31 09:33:38 2015 -0700

--
 R/pkg/R/DataFrame.R | 4 
 R/pkg/R/RDD.R   | 7 +--
 R/pkg/R/column.R| 2 +-
 R/pkg/R/context.R   | 2 +-
 R/pkg/R/pairRDD.R   | 2 +-
 R/pkg/R/utils.R | 4 ++--
 R/pkg/inst/tests/test_binary_function.R | 2 +-
 R/pkg/inst/tests/test_rdd.R | 6 +++---
 R/pkg/inst/tests/test_sparkSQL.R| 4 +++-
 9 files changed, 21 insertions(+), 12 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/fc0e57e5/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index f4c93d3..b31ad37 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1322,9 +1322,11 @@ setMethod(write.df,
 org.apache.spark.sql.parquet)
 }
 allModes - c(append, overwrite, error, ignore)
+# nolint start
 if (!(mode %in% allModes)) {
   stop('mode should be one of append, overwrite, error, 
ignore')
 }
+# nolint end
 jmode - callJStatic(org.apache.spark.sql.api.r.SQLUtils, 
saveMode, mode)
 options - varargsToEnv(...)
 if (!is.null(path)) {
@@ -1384,9 +1386,11 @@ setMethod(saveAsTable,
 org.apache.spark.sql.parquet)
 }
 allModes - c(append, overwrite, error, ignore)
+# nolint start
 if (!(mode %in% allModes)) {
   stop('mode should be one of append, overwrite, error, 
ignore')
 }
+# nolint end
 jmode - callJStatic(org.apache.spark.sql.api.r.SQLUtils, 
saveMode, mode)
 options - varargsToEnv(...)
 callJMethod(df@sdf, saveAsTable, tableName, source, jmode, 
options)

http://git-wip-us.apache.org/repos/asf/spark/blob/fc0e57e5/R/pkg/R/RDD.R
--
diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
index d2d0967..2a013b3 100644
--- a/R/pkg/R/RDD.R
+++ b/R/pkg/R/RDD.R
@@ -85,7 +85,9 @@ setMethod(initialize, PipelinedRDD, function(.Object, 
prev, func, jrdd_val)
 
   isPipelinable - function(rdd) {
 e - rdd@env
+# nolint start
 !(e$isCached || e$isCheckpointed)
+# nolint end
   }
 
   if (!inherits(prev, PipelinedRDD) || !isPipelinable(prev)) {
@@ -97,7 +99,8 @@ setMethod(initialize, PipelinedRDD, function(.Object, 
prev, func, jrdd_val)
 # prev_serializedMode is used during the delayed computation of JRDD in 
getJRDD
   } else {
 pipelinedFunc - function(partIndex, part) {
-  func(partIndex, prev@func(partIndex, part))
+  f - prev@func
+  func(partIndex, f(partIndex, part))
 }
 .Object@func - cleanClosure(pipelinedFunc)
 .Object@prev_jrdd - prev@prev_jrdd # maintain the pipeline
@@ -841,7 +844,7 @@ setMethod(sampleRDD,
 if (withReplacement) {
   count - rpois(1, fraction)
   if (count  0) {
-res[(len + 1):(len + count)] - rep(list(elem), count)
+res[ (len + 1) : (len + count) ] - rep(list(elem), count)
 len - len + count

1 2 3 4 5 >

1 - 100 of 491 matches

Mail list logo