[2/2] spark git commit: [SPARK-6824] Fill the docs for DataFrame API in SparkR

shivaram Fri, 08 May 2015 11:26:52 -0700

[SPARK-6824] Fill the docs for DataFrame API in SparkR

This patch also removes the RDD docs from being built as a part of roxygen just 
by the method to delete
" ' '" of " \#' ".


Author: hqzizania <[email protected]>
Author: qhuang <[email protected]>

Closes #5969 from hqzizania/R1 and squashes the following commits:

6d27696 [qhuang] fixes in NAMESPACE
eb4b095 [qhuang] remove more docs
6394579 [qhuang] remove RDD docs in generics.R
6813860 [hqzizania] Fill the docs for DataFrame API in SparkR
857220f [hqzizania] remove the pairRDD docs from being built as a part of 
roxygen
c045d64 [hqzizania] remove the RDD docs from being built as a part of roxygen

(cherry picked from commit 008a60dd371e76819d8e08ab638cac7b3a48c9fc)
Signed-off-by: Shivaram Venkataraman <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4f01f5b5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4f01f5b5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4f01f5b5

Branch: refs/heads/branch-1.4
Commit: 4f01f5b563819e2ce7d3ac7ea86162b4e76935a3
Parents: 75fed0c
Author: hqzizania <[email protected]>
Authored: Fri May 8 11:25:04 2015 -0700
Committer: Shivaram Venkataraman <[email protected]>
Committed: Fri May 8 11:25:20 2015 -0700

----------------------------------------------------------------------
 R/pkg/DESCRIPTION    |    2 +-
 R/pkg/NAMESPACE      |    4 -
 R/pkg/R/DataFrame.R  |   95 +--
 R/pkg/R/RDD.R        | 1546 ++++++++++++++++++++++-----------------------
 R/pkg/R/SQLContext.R |   64 +-
 R/pkg/R/broadcast.R  |   64 +-
 R/pkg/R/context.R    |  240 +++----
 R/pkg/R/generics.R   |  318 +++++-----
 R/pkg/R/pairRDD.R    |  886 +++++++++++++-------------
 9 files changed, 1610 insertions(+), 1609 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/4f01f5b5/R/pkg/DESCRIPTION
----------------------------------------------------------------------
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 1c1779a..efc85bb 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -15,11 +15,11 @@ Suggests:
 Description: R frontend for Spark
 License: Apache License (== 2.0)
 Collate:
+    'schema.R'
     'generics.R'
     'jobj.R'
     'RDD.R'
     'pairRDD.R'
-    'schema.R'
     'column.R'
     'group.R'
     'DataFrame.R'

http://git-wip-us.apache.org/repos/asf/spark/blob/4f01f5b5/R/pkg/NAMESPACE
----------------------------------------------------------------------
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 3fb92be..7611f47 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -26,7 +26,6 @@ exportMethods("cache",
               "intersect",
               "isLocal",
               "join",
-              "length",
               "limit",
               "orderBy",
               "names",
@@ -101,9 +100,6 @@ export("cacheTable",
        "tables",
        "uncacheTable")
 
-export("sparkRSQL.init",
-       "sparkRHive.init")
-
 export("structField",
        "structField.jobj",
        "structField.character",

http://git-wip-us.apache.org/repos/asf/spark/blob/4f01f5b5/R/pkg/R/DataFrame.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 47d92f1..354642e 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -45,6 +45,9 @@ setMethod("initialize", "DataFrame", function(.Object, sdf, 
isCached) {
 
 #' @rdname DataFrame
 #' @export
+#'
+#' @param sdf A Java object reference to the backing Scala DataFrame
+#' @param isCached TRUE if the dataFrame is cached
 dataFrame <- function(sdf, isCached = FALSE) {
   new("DataFrame", sdf, isCached)
 }
@@ -244,7 +247,7 @@ setMethod("columns",
           })
 
 #' @rdname columns
-#' @export
+#' @aliases names,DataFrame,function-method
 setMethod("names",
           signature(x = "DataFrame"),
           function(x) {
@@ -399,23 +402,23 @@ setMethod("repartition",
             dataFrame(sdf)     
           })
 
-#' toJSON
-#'
-#' Convert the rows of a DataFrame into JSON objects and return an RDD where
-#' each element contains a JSON string.
-#'
-#' @param x A SparkSQL DataFrame
-#' @return A StringRRDD of JSON objects
-#' @rdname tojson
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlCtx <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlCtx, path)
-#' newRDD <- toJSON(df)
-#'}
+# toJSON
+#
+# Convert the rows of a DataFrame into JSON objects and return an RDD where
+# each element contains a JSON string.
+#
+#@param x A SparkSQL DataFrame
+# @return A StringRRDD of JSON objects
+# @rdname tojson
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# sqlCtx <- sparkRSQL.init(sc)
+# path <- "path/to/file.json"
+# df <- jsonFile(sqlCtx, path)
+# newRDD <- toJSON(df)
+#}
 setMethod("toJSON",
           signature(x = "DataFrame"),
           function(x) {
@@ -578,8 +581,8 @@ setMethod("limit",
             dataFrame(res)
           })
 
-# Take the first NUM rows of a DataFrame and return a the results as a 
data.frame
-
+#' Take the first NUM rows of a DataFrame and return a the results as a 
data.frame
+#' 
 #' @rdname take
 #' @export
 #' @examples
@@ -644,22 +647,22 @@ setMethod("first",
             take(x, 1)
           })
 
-#' toRDD()
-#' 
-#' Converts a Spark DataFrame to an RDD while preserving column names.
-#' 
-#' @param x A Spark DataFrame
-#' 
-#' @rdname DataFrame
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlCtx <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlCtx, path)
-#' rdd <- toRDD(df)
-#' }
+# toRDD()
+# 
+# Converts a Spark DataFrame to an RDD while preserving column names.
+# 
+# @param x A Spark DataFrame
+# 
+# @rdname DataFrame
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# sqlCtx <- sparkRSQL.init(sc)
+# path <- "path/to/file.json"
+# df <- jsonFile(sqlCtx, path)
+# rdd <- toRDD(df)
+# }
 setMethod("toRDD",
           signature(x = "DataFrame"),
           function(x) {
@@ -706,6 +709,7 @@ setMethod("groupBy",
 #'
 #' Compute aggregates by specifying a list of columns
 #'
+#' @param x a DataFrame
 #' @rdname DataFrame
 #' @export
 setMethod("agg",
@@ -721,7 +725,7 @@ setMethod("agg",
 # the requested map function.                                                  
   #
 
###################################################################################
 
-#' @rdname lapply
+# @rdname lapply
 setMethod("lapply",
           signature(X = "DataFrame", FUN = "function"),
           function(X, FUN) {
@@ -729,14 +733,14 @@ setMethod("lapply",
             lapply(rdd, FUN)
           })
 
-#' @rdname lapply
+# @rdname lapply
 setMethod("map",
           signature(X = "DataFrame", FUN = "function"),
           function(X, FUN) {
             lapply(X, FUN)
           })
 
-#' @rdname flatMap
+# @rdname flatMap
 setMethod("flatMap",
           signature(X = "DataFrame", FUN = "function"),
           function(X, FUN) {
@@ -744,7 +748,7 @@ setMethod("flatMap",
             flatMap(rdd, FUN)
           })
 
-#' @rdname lapplyPartition
+# @rdname lapplyPartition
 setMethod("lapplyPartition",
           signature(X = "DataFrame", FUN = "function"),
           function(X, FUN) {
@@ -752,14 +756,14 @@ setMethod("lapplyPartition",
             lapplyPartition(rdd, FUN)
           })
 
-#' @rdname lapplyPartition
+# @rdname lapplyPartition
 setMethod("mapPartitions",
           signature(X = "DataFrame", FUN = "function"),
           function(X, FUN) {
             lapplyPartition(X, FUN)
           })
 
-#' @rdname foreach
+# @rdname foreach
 setMethod("foreach",
           signature(x = "DataFrame", func = "function"),
           function(x, func) {
@@ -767,7 +771,7 @@ setMethod("foreach",
             foreach(rdd, func)
           })
 
-#' @rdname foreach
+# @rdname foreach
 setMethod("foreachPartition",
           signature(x = "DataFrame", func = "function"),
           function(x, func) {
@@ -788,6 +792,7 @@ setMethod("$", signature(x = "DataFrame"),
             getColumn(x, name)
           })
 
+#' @rdname select
 setMethod("$<-", signature(x = "DataFrame"),
           function(x, name, value) {
             stopifnot(class(value) == "Column" || is.null(value))
@@ -1009,7 +1014,7 @@ setMethod("sortDF",
           })
 
 #' @rdname sortDF
-#' @export
+#' @aliases orderBy,DataFrame,function-method
 setMethod("orderBy",
           signature(x = "DataFrame", col = "characterOrColumn"),
           function(x, col) {
@@ -1046,7 +1051,7 @@ setMethod("filter",
           })
 
 #' @rdname filter
-#' @export
+#' @aliases where,DataFrame,function-method
 setMethod("where",
           signature(x = "DataFrame", condition = "characterOrColumn"),
           function(x, condition) {

http://git-wip-us.apache.org/repos/asf/spark/blob/4f01f5b5/R/pkg/R/RDD.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
index d1018c2..73999a6 100644
--- a/R/pkg/R/RDD.R
+++ b/R/pkg/R/RDD.R
@@ -19,16 +19,16 @@
 
 setOldClass("jobj")
 
-#' @title S4 class that represents an RDD
-#' @description RDD can be created using functions like
-#'              \code{parallelize}, \code{textFile} etc.
-#' @rdname RDD
-#' @seealso parallelize, textFile
-#'
-#' @slot env An R environment that stores bookkeeping states of the RDD
-#' @slot jrdd Java object reference to the backing JavaRDD
-#' to an RDD
-#' @export
+# @title S4 class that represents an RDD
+# @description RDD can be created using functions like
+#              \code{parallelize}, \code{textFile} etc.
+# @rdname RDD
+# @seealso parallelize, textFile
+#
+# @slot env An R environment that stores bookkeeping states of the RDD
+# @slot jrdd Java object reference to the backing JavaRDD
+# to an RDD
+# @export
 setClass("RDD",
          slots = list(env = "environment",
                       jrdd = "jobj"))
@@ -108,14 +108,14 @@ setMethod("initialize", "PipelinedRDD", function(.Object, 
prev, func, jrdd_val)
   .Object
 })
 
-#' @rdname RDD
-#' @export
-#'
-#' @param jrdd Java object reference to the backing JavaRDD
-#' @param serializedMode Use "byte" if the RDD stores data serialized in R, 
"string" if the RDD
-#' stores strings, and "row" if the RDD stores the rows of a DataFrame
-#' @param isCached TRUE if the RDD is cached
-#' @param isCheckpointed TRUE if the RDD has been checkpointed
+# @rdname RDD
+# @export
+#
+# @param jrdd Java object reference to the backing JavaRDD
+# @param serializedMode Use "byte" if the RDD stores data serialized in R, 
"string" if the RDD
+# stores strings, and "row" if the RDD stores the rows of a DataFrame
+# @param isCached TRUE if the RDD is cached
+# @param isCheckpointed TRUE if the RDD has been checkpointed
 RDD <- function(jrdd, serializedMode = "byte", isCached = FALSE,
                 isCheckpointed = FALSE) {
   new("RDD", jrdd, serializedMode, isCached, isCheckpointed)
@@ -200,19 +200,19 @@ setValidity("RDD",
 
 ############ Actions and Transformations ############
 
-#' Persist an RDD
-#'
-#' Persist this RDD with the default storage level (MEMORY_ONLY).
-#'
-#' @param x The RDD to cache
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10, 2L)
-#' cache(rdd)
-#'}
-#' @rdname cache-methods
-#' @aliases cache,RDD-method
+# Persist an RDD
+#
+# Persist this RDD with the default storage level (MEMORY_ONLY).
+#
+# @param x The RDD to cache
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10, 2L)
+# cache(rdd)
+#}
+# @rdname cache-methods
+# @aliases cache,RDD-method
 setMethod("cache",
           signature(x = "RDD"),
           function(x) {
@@ -221,22 +221,22 @@ setMethod("cache",
             x
           })
 
-#' Persist an RDD
-#'
-#' Persist this RDD with the specified storage level. For details of the
-#' supported storage levels, refer to
-#' http://spark.apache.org/docs/latest/programming-guide.html#rdd-persistence.
-#'
-#' @param x The RDD to persist
-#' @param newLevel The new storage level to be assigned
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10, 2L)
-#' persist(rdd, "MEMORY_AND_DISK")
-#'}
-#' @rdname persist
-#' @aliases persist,RDD-method
+# Persist an RDD
+#
+# Persist this RDD with the specified storage level. For details of the
+# supported storage levels, refer to
+# http://spark.apache.org/docs/latest/programming-guide.html#rdd-persistence.
+#
+# @param x The RDD to persist
+# @param newLevel The new storage level to be assigned
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10, 2L)
+# persist(rdd, "MEMORY_AND_DISK")
+#}
+# @rdname persist
+# @aliases persist,RDD-method
 setMethod("persist",
           signature(x = "RDD", newLevel = "character"),
           function(x, newLevel) {
@@ -245,21 +245,21 @@ setMethod("persist",
             x
           })
 
-#' Unpersist an RDD
-#'
-#' Mark the RDD as non-persistent, and remove all blocks for it from memory and
-#' disk.
-#'
-#' @param x The RDD to unpersist
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10, 2L)
-#' cache(rdd) # rdd@@env$isCached == TRUE
-#' unpersist(rdd) # rdd@@env$isCached == FALSE
-#'}
-#' @rdname unpersist-methods
-#' @aliases unpersist,RDD-method
+# Unpersist an RDD
+#
+# Mark the RDD as non-persistent, and remove all blocks for it from memory and
+# disk.
+#
+# @param x The RDD to unpersist
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10, 2L)
+# cache(rdd) # rdd@@env$isCached == TRUE
+# unpersist(rdd) # rdd@@env$isCached == FALSE
+#}
+# @rdname unpersist-methods
+# @aliases unpersist,RDD-method
 setMethod("unpersist",
           signature(x = "RDD"),
           function(x) {
@@ -268,24 +268,24 @@ setMethod("unpersist",
             x
           })
 
-#' Checkpoint an RDD
-#'
-#' Mark this RDD for checkpointing. It will be saved to a file inside the
-#' checkpoint directory set with setCheckpointDir() and all references to its
-#' parent RDDs will be removed. This function must be called before any job has
-#' been executed on this RDD. It is strongly recommended that this RDD is
-#' persisted in memory, otherwise saving it on a file will require 
recomputation.
-#'
-#' @param x The RDD to checkpoint
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' setCheckpointDir(sc, "checkpoint")
-#' rdd <- parallelize(sc, 1:10, 2L)
-#' checkpoint(rdd)
-#'}
-#' @rdname checkpoint-methods
-#' @aliases checkpoint,RDD-method
+# Checkpoint an RDD
+#
+# Mark this RDD for checkpointing. It will be saved to a file inside the
+# checkpoint directory set with setCheckpointDir() and all references to its
+# parent RDDs will be removed. This function must be called before any job has
+# been executed on this RDD. It is strongly recommended that this RDD is
+# persisted in memory, otherwise saving it on a file will require 
recomputation.
+#
+# @param x The RDD to checkpoint
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# setCheckpointDir(sc, "checkpoint")
+# rdd <- parallelize(sc, 1:10, 2L)
+# checkpoint(rdd)
+#}
+# @rdname checkpoint-methods
+# @aliases checkpoint,RDD-method
 setMethod("checkpoint",
           signature(x = "RDD"),
           function(x) {
@@ -295,18 +295,18 @@ setMethod("checkpoint",
             x
           })
 
-#' Gets the number of partitions of an RDD
-#'
-#' @param x A RDD.
-#' @return the number of partitions of rdd as an integer.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10, 2L)
-#' numPartitions(rdd)  # 2L
-#'}
-#' @rdname numPartitions
-#' @aliases numPartitions,RDD-method
+# Gets the number of partitions of an RDD
+#
+# @param x A RDD.
+# @return the number of partitions of rdd as an integer.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10, 2L)
+# numPartitions(rdd)  # 2L
+#}
+# @rdname numPartitions
+# @aliases numPartitions,RDD-method
 setMethod("numPartitions",
           signature(x = "RDD"),
           function(x) {
@@ -315,24 +315,24 @@ setMethod("numPartitions",
             callJMethod(partitions, "size")
           })
 
-#' Collect elements of an RDD
-#'
-#' @description
-#' \code{collect} returns a list that contains all of the elements in this RDD.
-#'
-#' @param x The RDD to collect
-#' @param ... Other optional arguments to collect
-#' @param flatten FALSE if the list should not flattened
-#' @return a list containing elements in the RDD
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10, 2L)
-#' collect(rdd) # list from 1 to 10
-#' collectPartition(rdd, 0L) # list from 1 to 5
-#'}
-#' @rdname collect-methods
-#' @aliases collect,RDD-method
+# Collect elements of an RDD
+#
+# @description
+# \code{collect} returns a list that contains all of the elements in this RDD.
+#
+# @param x The RDD to collect
+# @param ... Other optional arguments to collect
+# @param flatten FALSE if the list should not flattened
+# @return a list containing elements in the RDD
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10, 2L)
+# collect(rdd) # list from 1 to 10
+# collectPartition(rdd, 0L) # list from 1 to 5
+#}
+# @rdname collect-methods
+# @aliases collect,RDD-method
 setMethod("collect",
           signature(x = "RDD"),
           function(x, flatten = TRUE) {
@@ -343,12 +343,12 @@ setMethod("collect",
           })
 
 
-#' @description
-#' \code{collectPartition} returns a list that contains all of the elements
-#' in the specified partition of the RDD.
-#' @param partitionId the partition to collect (starts from 0)
-#' @rdname collect-methods
-#' @aliases collectPartition,integer,RDD-method
+# @description
+# \code{collectPartition} returns a list that contains all of the elements
+# in the specified partition of the RDD.
+# @param partitionId the partition to collect (starts from 0)
+# @rdname collect-methods
+# @aliases collectPartition,integer,RDD-method
 setMethod("collectPartition",
           signature(x = "RDD", partitionId = "integer"),
           function(x, partitionId) {
@@ -361,17 +361,17 @@ setMethod("collectPartition",
               serializedMode = getSerializedMode(x))
           })
 
-#' @description
-#' \code{collectAsMap} returns a named list as a map that contains all of the 
elements
-#' in a key-value pair RDD. 
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(list(1, 2), list(3, 4)), 2L)
-#' collectAsMap(rdd) # list(`1` = 2, `3` = 4)
-#'}
-#' @rdname collect-methods
-#' @aliases collectAsMap,RDD-method
+# @description
+# \code{collectAsMap} returns a named list as a map that contains all of the 
elements
+# in a key-value pair RDD. 
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(list(1, 2), list(3, 4)), 2L)
+# collectAsMap(rdd) # list(`1` = 2, `3` = 4)
+#}
+# @rdname collect-methods
+# @aliases collectAsMap,RDD-method
 setMethod("collectAsMap",
           signature(x = "RDD"),
           function(x) {
@@ -381,19 +381,19 @@ setMethod("collectAsMap",
             as.list(map)
           })
 
-#' Return the number of elements in the RDD.
-#'
-#' @param x The RDD to count
-#' @return number of elements in the RDD.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' count(rdd) # 10
-#' length(rdd) # Same as count
-#'}
-#' @rdname count
-#' @aliases count,RDD-method
+# Return the number of elements in the RDD.
+#
+# @param x The RDD to count
+# @return number of elements in the RDD.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# count(rdd) # 10
+# length(rdd) # Same as count
+#}
+# @rdname count
+# @aliases count,RDD-method
 setMethod("count",
           signature(x = "RDD"),
           function(x) {
@@ -405,31 +405,31 @@ setMethod("count",
             sum(as.integer(vals))
           })
 
-#' Return the number of elements in the RDD
-#' @export
-#' @rdname count
+# Return the number of elements in the RDD
+# @export
+# @rdname count
 setMethod("length",
           signature(x = "RDD"),
           function(x) {
             count(x)
           })
 
-#' Return the count of each unique value in this RDD as a list of
-#' (value, count) pairs.
-#'
-#' Same as countByValue in Spark.
-#'
-#' @param x The RDD to count
-#' @return list of (value, count) pairs, where count is number of each unique
-#' value in rdd.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, c(1,2,3,2,1))
-#' countByValue(rdd) # (1,2L), (2,2L), (3,1L)
-#'}
-#' @rdname countByValue
-#' @aliases countByValue,RDD-method
+# Return the count of each unique value in this RDD as a list of
+# (value, count) pairs.
+#
+# Same as countByValue in Spark.
+#
+# @param x The RDD to count
+# @return list of (value, count) pairs, where count is number of each unique
+# value in rdd.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, c(1,2,3,2,1))
+# countByValue(rdd) # (1,2L), (2,2L), (3,1L)
+#}
+# @rdname countByValue
+# @aliases countByValue,RDD-method
 setMethod("countByValue",
           signature(x = "RDD"),
           function(x) {
@@ -437,23 +437,23 @@ setMethod("countByValue",
             collect(reduceByKey(ones, `+`, numPartitions(x)))
           })
 
-#' Apply a function to all elements
-#'
-#' This function creates a new RDD by applying the given transformation to all
-#' elements of the given RDD
-#'
-#' @param X The RDD to apply the transformation.
-#' @param FUN the transformation to apply on each element
-#' @return a new RDD created by the transformation.
-#' @rdname lapply
-#' @aliases lapply
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' multiplyByTwo <- lapply(rdd, function(x) { x * 2 })
-#' collect(multiplyByTwo) # 2,4,6...
-#'}
+# Apply a function to all elements
+#
+# This function creates a new RDD by applying the given transformation to all
+# elements of the given RDD
+#
+# @param X The RDD to apply the transformation.
+# @param FUN the transformation to apply on each element
+# @return a new RDD created by the transformation.
+# @rdname lapply
+# @aliases lapply
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# multiplyByTwo <- lapply(rdd, function(x) { x * 2 })
+# collect(multiplyByTwo) # 2,4,6...
+#}
 setMethod("lapply",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
@@ -463,31 +463,31 @@ setMethod("lapply",
             lapplyPartitionsWithIndex(X, func)
           })
 
-#' @rdname lapply
-#' @aliases map,RDD,function-method
+# @rdname lapply
+# @aliases map,RDD,function-method
 setMethod("map",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
             lapply(X, FUN)
           })
 
-#' Flatten results after apply a function to all elements
-#'
-#' This function return a new RDD by first applying a function to all
-#' elements of this RDD, and then flattening the results.
-#'
-#' @param X The RDD to apply the transformation.
-#' @param FUN the transformation to apply on each element
-#' @return a new RDD created by the transformation.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' multiplyByTwo <- flatMap(rdd, function(x) { list(x*2, x*10) })
-#' collect(multiplyByTwo) # 2,20,4,40,6,60...
-#'}
-#' @rdname flatMap
-#' @aliases flatMap,RDD,function-method
+# Flatten results after apply a function to all elements
+#
+# This function return a new RDD by first applying a function to all
+# elements of this RDD, and then flattening the results.
+#
+# @param X The RDD to apply the transformation.
+# @param FUN the transformation to apply on each element
+# @return a new RDD created by the transformation.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# multiplyByTwo <- flatMap(rdd, function(x) { list(x*2, x*10) })
+# collect(multiplyByTwo) # 2,20,4,40,6,60...
+#}
+# @rdname flatMap
+# @aliases flatMap,RDD,function-method
 setMethod("flatMap",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
@@ -500,83 +500,83 @@ setMethod("flatMap",
             lapplyPartition(X, partitionFunc)
           })
 
-#' Apply a function to each partition of an RDD
-#'
-#' Return a new RDD by applying a function to each partition of this RDD.
-#'
-#' @param X The RDD to apply the transformation.
-#' @param FUN the transformation to apply on each partition.
-#' @return a new RDD created by the transformation.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' partitionSum <- lapplyPartition(rdd, function(part) { Reduce("+", part) })
-#' collect(partitionSum) # 15, 40
-#'}
-#' @rdname lapplyPartition
-#' @aliases lapplyPartition,RDD,function-method
+# Apply a function to each partition of an RDD
+#
+# Return a new RDD by applying a function to each partition of this RDD.
+#
+# @param X The RDD to apply the transformation.
+# @param FUN the transformation to apply on each partition.
+# @return a new RDD created by the transformation.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# partitionSum <- lapplyPartition(rdd, function(part) { Reduce("+", part) })
+# collect(partitionSum) # 15, 40
+#}
+# @rdname lapplyPartition
+# @aliases lapplyPartition,RDD,function-method
 setMethod("lapplyPartition",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
             lapplyPartitionsWithIndex(X, function(s, part) { FUN(part) })
           })
 
-#' mapPartitions is the same as lapplyPartition.
-#'
-#' @rdname lapplyPartition
-#' @aliases mapPartitions,RDD,function-method
+# mapPartitions is the same as lapplyPartition.
+#
+# @rdname lapplyPartition
+# @aliases mapPartitions,RDD,function-method
 setMethod("mapPartitions",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
             lapplyPartition(X, FUN)
           })
 
-#' Return a new RDD by applying a function to each partition of this RDD, while
-#' tracking the index of the original partition.
-#'
-#' @param X The RDD to apply the transformation.
-#' @param FUN the transformation to apply on each partition; takes the 
partition
-#'        index and a list of elements in the particular partition.
-#' @return a new RDD created by the transformation.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10, 5L)
-#' prod <- lapplyPartitionsWithIndex(rdd, function(partIndex, part) {
-#'                                          partIndex * Reduce("+", part) })
-#' collect(prod, flatten = FALSE) # 0, 7, 22, 45, 76
-#'}
-#' @rdname lapplyPartitionsWithIndex
-#' @aliases lapplyPartitionsWithIndex,RDD,function-method
+# Return a new RDD by applying a function to each partition of this RDD, while
+# tracking the index of the original partition.
+#
+# @param X The RDD to apply the transformation.
+# @param FUN the transformation to apply on each partition; takes the partition
+#        index and a list of elements in the particular partition.
+# @return a new RDD created by the transformation.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10, 5L)
+# prod <- lapplyPartitionsWithIndex(rdd, function(partIndex, part) {
+#                                          partIndex * Reduce("+", part) })
+# collect(prod, flatten = FALSE) # 0, 7, 22, 45, 76
+#}
+# @rdname lapplyPartitionsWithIndex
+# @aliases lapplyPartitionsWithIndex,RDD,function-method
 setMethod("lapplyPartitionsWithIndex",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
             PipelinedRDD(X, FUN)
           })
 
-#' @rdname lapplyPartitionsWithIndex
-#' @aliases mapPartitionsWithIndex,RDD,function-method
+# @rdname lapplyPartitionsWithIndex
+# @aliases mapPartitionsWithIndex,RDD,function-method
 setMethod("mapPartitionsWithIndex",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
             lapplyPartitionsWithIndex(X, FUN)
           })
 
-#' This function returns a new RDD containing only the elements that satisfy
-#' a predicate (i.e. returning TRUE in a given logical function).
-#' The same as `filter()' in Spark.
-#'
-#' @param x The RDD to be filtered.
-#' @param f A unary predicate function.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' unlist(collect(filterRDD(rdd, function (x) { x < 3 }))) # c(1, 2)
-#'}
-#' @rdname filterRDD
-#' @aliases filterRDD,RDD,function-method
+# This function returns a new RDD containing only the elements that satisfy
+# a predicate (i.e. returning TRUE in a given logical function).
+# The same as `filter()' in Spark.
+#
+# @param x The RDD to be filtered.
+# @param f A unary predicate function.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# unlist(collect(filterRDD(rdd, function (x) { x < 3 }))) # c(1, 2)
+#}
+# @rdname filterRDD
+# @aliases filterRDD,RDD,function-method
 setMethod("filterRDD",
           signature(x = "RDD", f = "function"),
           function(x, f) {
@@ -586,30 +586,30 @@ setMethod("filterRDD",
             lapplyPartition(x, filter.func)
           })
 
-#' @rdname filterRDD
-#' @aliases Filter
+# @rdname filterRDD
+# @aliases Filter
 setMethod("Filter",
           signature(f = "function", x = "RDD"),
           function(f, x) {
             filterRDD(x, f)
           })
 
-#' Reduce across elements of an RDD.
-#'
-#' This function reduces the elements of this RDD using the
-#' specified commutative and associative binary operator.
-#'
-#' @param x The RDD to reduce
-#' @param func Commutative and associative function to apply on elements
-#'             of the RDD.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' reduce(rdd, "+") # 55
-#'}
-#' @rdname reduce
-#' @aliases reduce,RDD,ANY-method
+# Reduce across elements of an RDD.
+#
+# This function reduces the elements of this RDD using the
+# specified commutative and associative binary operator.
+#
+# @param x The RDD to reduce
+# @param func Commutative and associative function to apply on elements
+#             of the RDD.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# reduce(rdd, "+") # 55
+#}
+# @rdname reduce
+# @aliases reduce,RDD,ANY-method
 setMethod("reduce",
           signature(x = "RDD", func = "ANY"),
           function(x, func) {
@@ -623,70 +623,70 @@ setMethod("reduce",
             Reduce(func, partitionList)
           })
 
-#' Get the maximum element of an RDD.
-#'
-#' @param x The RDD to get the maximum element from
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' maximum(rdd) # 10
-#'}
-#' @rdname maximum
-#' @aliases maximum,RDD
+# Get the maximum element of an RDD.
+#
+# @param x The RDD to get the maximum element from
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# maximum(rdd) # 10
+#}
+# @rdname maximum
+# @aliases maximum,RDD
 setMethod("maximum",
           signature(x = "RDD"),
           function(x) {
             reduce(x, max)
           })
 
-#' Get the minimum element of an RDD.
-#'
-#' @param x The RDD to get the minimum element from
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' minimum(rdd) # 1
-#'}
-#' @rdname minimum
-#' @aliases minimum,RDD
+# Get the minimum element of an RDD.
+#
+# @param x The RDD to get the minimum element from
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# minimum(rdd) # 1
+#}
+# @rdname minimum
+# @aliases minimum,RDD
 setMethod("minimum",
           signature(x = "RDD"),
           function(x) {
             reduce(x, min)
           })
 
-#' Add up the elements in an RDD.
-#'
-#' @param x The RDD to add up the elements in
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' sumRDD(rdd) # 55
-#'}
-#' @rdname sumRDD 
-#' @aliases sumRDD,RDD
+# Add up the elements in an RDD.
+#
+# @param x The RDD to add up the elements in
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# sumRDD(rdd) # 55
+#}
+# @rdname sumRDD 
+# @aliases sumRDD,RDD
 setMethod("sumRDD",
           signature(x = "RDD"),
           function(x) {
             reduce(x, "+")
           })
 
-#' Applies a function to all elements in an RDD, and force evaluation.
-#'
-#' @param x The RDD to apply the function
-#' @param func The function to be applied.
-#' @return invisible NULL.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' foreach(rdd, function(x) { save(x, file=...) })
-#'}
-#' @rdname foreach
-#' @aliases foreach,RDD,function-method
+# Applies a function to all elements in an RDD, and force evaluation.
+#
+# @param x The RDD to apply the function
+# @param func The function to be applied.
+# @return invisible NULL.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# foreach(rdd, function(x) { save(x, file=...) })
+#}
+# @rdname foreach
+# @aliases foreach,RDD,function-method
 setMethod("foreach",
           signature(x = "RDD", func = "function"),
           function(x, func) {
@@ -697,37 +697,37 @@ setMethod("foreach",
             invisible(collect(mapPartitions(x, partition.func)))
           })
 
-#' Applies a function to each partition in an RDD, and force evaluation.
-#'
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' foreachPartition(rdd, function(part) { save(part, file=...); NULL })
-#'}
-#' @rdname foreach
-#' @aliases foreachPartition,RDD,function-method
+# Applies a function to each partition in an RDD, and force evaluation.
+#
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# foreachPartition(rdd, function(part) { save(part, file=...); NULL })
+#}
+# @rdname foreach
+# @aliases foreachPartition,RDD,function-method
 setMethod("foreachPartition",
           signature(x = "RDD", func = "function"),
           function(x, func) {
             invisible(collect(mapPartitions(x, func)))
           })
 
-#' Take elements from an RDD.
-#'
-#' This function takes the first NUM elements in the RDD and
-#' returns them in a list.
-#'
-#' @param x The RDD to take elements from
-#' @param num Number of elements to take
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' take(rdd, 2L) # list(1, 2)
-#'}
-#' @rdname take
-#' @aliases take,RDD,numeric-method
+# Take elements from an RDD.
+#
+# This function takes the first NUM elements in the RDD and
+# returns them in a list.
+#
+# @param x The RDD to take elements from
+# @param num Number of elements to take
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# take(rdd, 2L) # list(1, 2)
+#}
+# @rdname take
+# @aliases take,RDD,numeric-method
 setMethod("take",
           signature(x = "RDD", num = "numeric"),
           function(x, num) {
@@ -762,39 +762,39 @@ setMethod("take",
           })
 
 
-#' First
-#'
-#' Return the first element of an RDD
-#'
-#' @rdname first
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' first(rdd)
-#' }
+# First
+#
+# Return the first element of an RDD
+#
+# @rdname first
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# first(rdd)
+# }
 setMethod("first",
           signature(x = "RDD"),
           function(x) {
             take(x, 1)[[1]]
           })
 
-#' Removes the duplicates from RDD.
-#'
-#' This function returns a new RDD containing the distinct elements in the
-#' given RDD. The same as `distinct()' in Spark.
-#'
-#' @param x The RDD to remove duplicates from.
-#' @param numPartitions Number of partitions to create.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, c(1,2,2,3,3,3))
-#' sort(unlist(collect(distinct(rdd)))) # c(1, 2, 3)
-#'}
-#' @rdname distinct
-#' @aliases distinct,RDD-method
+# Removes the duplicates from RDD.
+#
+# This function returns a new RDD containing the distinct elements in the
+# given RDD. The same as `distinct()' in Spark.
+#
+# @param x The RDD to remove duplicates from.
+# @param numPartitions Number of partitions to create.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, c(1,2,2,3,3,3))
+# sort(unlist(collect(distinct(rdd)))) # c(1, 2, 3)
+#}
+# @rdname distinct
+# @aliases distinct,RDD-method
 setMethod("distinct",
           signature(x = "RDD"),
           function(x, numPartitions = SparkR:::numPartitions(x)) {
@@ -806,24 +806,24 @@ setMethod("distinct",
             resRDD
           })
 
-#' Return an RDD that is a sampled subset of the given RDD.
-#'
-#' The same as `sample()' in Spark. (We rename it due to signature
-#' inconsistencies with the `sample()' function in R's base package.)
-#'
-#' @param x The RDD to sample elements from
-#' @param withReplacement Sampling with replacement or not
-#' @param fraction The (rough) sample target fraction
-#' @param seed Randomness seed value
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' collect(sampleRDD(rdd, FALSE, 0.5, 1618L)) # ~5 distinct elements
-#' collect(sampleRDD(rdd, TRUE, 0.5, 9L)) # ~5 elements possibly with 
duplicates
-#'}
-#' @rdname sampleRDD
-#' @aliases sampleRDD,RDD
+# Return an RDD that is a sampled subset of the given RDD.
+#
+# The same as `sample()' in Spark. (We rename it due to signature
+# inconsistencies with the `sample()' function in R's base package.)
+#
+# @param x The RDD to sample elements from
+# @param withReplacement Sampling with replacement or not
+# @param fraction The (rough) sample target fraction
+# @param seed Randomness seed value
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# collect(sampleRDD(rdd, FALSE, 0.5, 1618L)) # ~5 distinct elements
+# collect(sampleRDD(rdd, TRUE, 0.5, 9L)) # ~5 elements possibly with duplicates
+#}
+# @rdname sampleRDD
+# @aliases sampleRDD,RDD
 setMethod("sampleRDD",
           signature(x = "RDD", withReplacement = "logical",
                     fraction = "numeric", seed = "integer"),
@@ -867,23 +867,23 @@ setMethod("sampleRDD",
             lapplyPartitionsWithIndex(x, samplingFunc)
           })
 
-#' Return a list of the elements that are a sampled subset of the given RDD.
-#'
-#' @param x The RDD to sample elements from
-#' @param withReplacement Sampling with replacement or not
-#' @param num Number of elements to return
-#' @param seed Randomness seed value
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:100)
-#' # exactly 5 elements sampled, which may not be distinct
-#' takeSample(rdd, TRUE, 5L, 1618L)
-#' # exactly 5 distinct elements sampled
-#' takeSample(rdd, FALSE, 5L, 16181618L)
-#'}
-#' @rdname takeSample
-#' @aliases takeSample,RDD
+# Return a list of the elements that are a sampled subset of the given RDD.
+#
+# @param x The RDD to sample elements from
+# @param withReplacement Sampling with replacement or not
+# @param num Number of elements to return
+# @param seed Randomness seed value
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:100)
+# # exactly 5 elements sampled, which may not be distinct
+# takeSample(rdd, TRUE, 5L, 1618L)
+# # exactly 5 distinct elements sampled
+# takeSample(rdd, FALSE, 5L, 16181618L)
+#}
+# @rdname takeSample
+# @aliases takeSample,RDD
 setMethod("takeSample", signature(x = "RDD", withReplacement = "logical",
                                   num = "integer", seed = "integer"),
           function(x, withReplacement, num, seed) {
@@ -930,18 +930,18 @@ setMethod("takeSample", signature(x = "RDD", 
withReplacement = "logical",
             sample(samples)[1:total]
           })
 
-#' Creates tuples of the elements in this RDD by applying a function.
-#'
-#' @param x The RDD.
-#' @param func The function to be applied.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(1, 2, 3))
-#' collect(keyBy(rdd, function(x) { x*x })) # list(list(1, 1), list(4, 2), 
list(9, 3))
-#'}
-#' @rdname keyBy
-#' @aliases keyBy,RDD
+# Creates tuples of the elements in this RDD by applying a function.
+#
+# @param x The RDD.
+# @param func The function to be applied.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(1, 2, 3))
+# collect(keyBy(rdd, function(x) { x*x })) # list(list(1, 1), list(4, 2), 
list(9, 3))
+#}
+# @rdname keyBy
+# @aliases keyBy,RDD
 setMethod("keyBy",
           signature(x = "RDD", func = "function"),
           function(x, func) {
@@ -951,44 +951,44 @@ setMethod("keyBy",
             lapply(x, apply.func)
           })
 
-#' Return a new RDD that has exactly numPartitions partitions.
-#' Can increase or decrease the level of parallelism in this RDD. Internally,
-#' this uses a shuffle to redistribute data.
-#' If you are decreasing the number of partitions in this RDD, consider using
-#' coalesce, which can avoid performing a shuffle.
-#'
-#' @param x The RDD.
-#' @param numPartitions Number of partitions to create.
-#' @seealso coalesce
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(1, 2, 3, 4, 5, 6, 7), 4L)
-#' numPartitions(rdd)                   # 4
-#' numPartitions(repartition(rdd, 2L))  # 2
-#'}
-#' @rdname repartition
-#' @aliases repartition,RDD
+# Return a new RDD that has exactly numPartitions partitions.
+# Can increase or decrease the level of parallelism in this RDD. Internally,
+# this uses a shuffle to redistribute data.
+# If you are decreasing the number of partitions in this RDD, consider using
+# coalesce, which can avoid performing a shuffle.
+#
+# @param x The RDD.
+# @param numPartitions Number of partitions to create.
+# @seealso coalesce
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(1, 2, 3, 4, 5, 6, 7), 4L)
+# numPartitions(rdd)                   # 4
+# numPartitions(repartition(rdd, 2L))  # 2
+#}
+# @rdname repartition
+# @aliases repartition,RDD
 setMethod("repartition",
           signature(x = "RDD", numPartitions = "numeric"),
           function(x, numPartitions) {
             coalesce(x, numPartitions, TRUE)
           })
 
-#' Return a new RDD that is reduced into numPartitions partitions.
-#'
-#' @param x The RDD.
-#' @param numPartitions Number of partitions to create.
-#' @seealso repartition
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(1, 2, 3, 4, 5), 3L)
-#' numPartitions(rdd)               # 3
-#' numPartitions(coalesce(rdd, 1L)) # 1
-#'}
-#' @rdname coalesce
-#' @aliases coalesce,RDD
+# Return a new RDD that is reduced into numPartitions partitions.
+#
+# @param x The RDD.
+# @param numPartitions Number of partitions to create.
+# @seealso repartition
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(1, 2, 3, 4, 5), 3L)
+# numPartitions(rdd)               # 3
+# numPartitions(coalesce(rdd, 1L)) # 1
+#}
+# @rdname coalesce
+# @aliases coalesce,RDD
 setMethod("coalesce",
            signature(x = "RDD", numPartitions = "numeric"),
            function(x, numPartitions, shuffle = FALSE) {
@@ -1012,19 +1012,19 @@ setMethod("coalesce",
              }
            })
 
-#' Save this RDD as a SequenceFile of serialized objects.
-#'
-#' @param x The RDD to save
-#' @param path The directory where the file is saved
-#' @seealso objectFile
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:3)
-#' saveAsObjectFile(rdd, "/tmp/sparkR-tmp")
-#'}
-#' @rdname saveAsObjectFile
-#' @aliases saveAsObjectFile,RDD
+# Save this RDD as a SequenceFile of serialized objects.
+#
+# @param x The RDD to save
+# @param path The directory where the file is saved
+# @seealso objectFile
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:3)
+# saveAsObjectFile(rdd, "/tmp/sparkR-tmp")
+#}
+# @rdname saveAsObjectFile
+# @aliases saveAsObjectFile,RDD
 setMethod("saveAsObjectFile",
           signature(x = "RDD", path = "character"),
           function(x, path) {
@@ -1037,18 +1037,18 @@ setMethod("saveAsObjectFile",
             invisible(callJMethod(getJRDD(x), "saveAsObjectFile", path))
           })
 
-#' Save this RDD as a text file, using string representations of elements.
-#'
-#' @param x The RDD to save
-#' @param path The directory where the partitions of the text file are saved
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:3)
-#' saveAsTextFile(rdd, "/tmp/sparkR-tmp")
-#'}
-#' @rdname saveAsTextFile
-#' @aliases saveAsTextFile,RDD
+# Save this RDD as a text file, using string representations of elements.
+#
+# @param x The RDD to save
+# @param path The directory where the partitions of the text file are saved
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:3)
+# saveAsTextFile(rdd, "/tmp/sparkR-tmp")
+#}
+# @rdname saveAsTextFile
+# @aliases saveAsTextFile,RDD
 setMethod("saveAsTextFile",
           signature(x = "RDD", path = "character"),
           function(x, path) {
@@ -1061,21 +1061,21 @@ setMethod("saveAsTextFile",
               callJMethod(getJRDD(stringRdd, serializedMode = "string"), 
"saveAsTextFile", path))
           })
 
-#' Sort an RDD by the given key function.
-#'
-#' @param x An RDD to be sorted.
-#' @param func A function used to compute the sort key for each element.
-#' @param ascending A flag to indicate whether the sorting is ascending or 
descending.
-#' @param numPartitions Number of partitions to create.
-#' @return An RDD where all elements are sorted.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(3, 2, 1))
-#' collect(sortBy(rdd, function(x) { x })) # list (1, 2, 3)
-#'}
-#' @rdname sortBy
-#' @aliases sortBy,RDD,RDD-method
+# Sort an RDD by the given key function.
+#
+# @param x An RDD to be sorted.
+# @param func A function used to compute the sort key for each element.
+# @param ascending A flag to indicate whether the sorting is ascending or 
descending.
+# @param numPartitions Number of partitions to create.
+# @return An RDD where all elements are sorted.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(3, 2, 1))
+# collect(sortBy(rdd, function(x) { x })) # list (1, 2, 3)
+#}
+# @rdname sortBy
+# @aliases sortBy,RDD,RDD-method
 setMethod("sortBy",
           signature(x = "RDD", func = "function"),
           function(x, func, ascending = TRUE, numPartitions = 
SparkR:::numPartitions(x)) {
@@ -1137,97 +1137,97 @@ takeOrderedElem <- function(x, num, ascending = TRUE) {
   resList
 }
 
-#' Returns the first N elements from an RDD in ascending order.
-#'
-#' @param x An RDD.
-#' @param num Number of elements to return.
-#' @return The first N elements from the RDD in ascending order.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(10, 1, 2, 9, 3, 4, 5, 6, 7))
-#' takeOrdered(rdd, 6L) # list(1, 2, 3, 4, 5, 6)
-#'}
-#' @rdname takeOrdered
-#' @aliases takeOrdered,RDD,RDD-method
+# Returns the first N elements from an RDD in ascending order.
+#
+# @param x An RDD.
+# @param num Number of elements to return.
+# @return The first N elements from the RDD in ascending order.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(10, 1, 2, 9, 3, 4, 5, 6, 7))
+# takeOrdered(rdd, 6L) # list(1, 2, 3, 4, 5, 6)
+#}
+# @rdname takeOrdered
+# @aliases takeOrdered,RDD,RDD-method
 setMethod("takeOrdered",
           signature(x = "RDD", num = "integer"),
           function(x, num) {          
             takeOrderedElem(x, num)
           })
 
-#' Returns the top N elements from an RDD.
-#'
-#' @param x An RDD.
-#' @param num Number of elements to return.
-#' @return The top N elements from the RDD.
-#' @rdname top
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(10, 1, 2, 9, 3, 4, 5, 6, 7))
-#' top(rdd, 6L) # list(10, 9, 7, 6, 5, 4)
-#'}
-#' @rdname top
-#' @aliases top,RDD,RDD-method
+# Returns the top N elements from an RDD.
+#
+# @param x An RDD.
+# @param num Number of elements to return.
+# @return The top N elements from the RDD.
+# @rdname top
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(10, 1, 2, 9, 3, 4, 5, 6, 7))
+# top(rdd, 6L) # list(10, 9, 7, 6, 5, 4)
+#}
+# @rdname top
+# @aliases top,RDD,RDD-method
 setMethod("top",
           signature(x = "RDD", num = "integer"),
           function(x, num) {          
             takeOrderedElem(x, num, FALSE)
           })
 
-#' Fold an RDD using a given associative function and a neutral "zero value".
-#'
-#' Aggregate the elements of each partition, and then the results for all the
-#' partitions, using a given associative function and a neutral "zero value".
-#' 
-#' @param x An RDD.
-#' @param zeroValue A neutral "zero value".
-#' @param op An associative function for the folding operation.
-#' @return The folding result.
-#' @rdname fold
-#' @seealso reduce
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(1, 2, 3, 4, 5))
-#' fold(rdd, 0, "+") # 15
-#'}
-#' @rdname fold
-#' @aliases fold,RDD,RDD-method
+# Fold an RDD using a given associative function and a neutral "zero value".
+#
+# Aggregate the elements of each partition, and then the results for all the
+# partitions, using a given associative function and a neutral "zero value".
+# 
+# @param x An RDD.
+# @param zeroValue A neutral "zero value".
+# @param op An associative function for the folding operation.
+# @return The folding result.
+# @rdname fold
+# @seealso reduce
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(1, 2, 3, 4, 5))
+# fold(rdd, 0, "+") # 15
+#}
+# @rdname fold
+# @aliases fold,RDD,RDD-method
 setMethod("fold",
           signature(x = "RDD", zeroValue = "ANY", op = "ANY"),
           function(x, zeroValue, op) {
             aggregateRDD(x, zeroValue, op, op)
           })
 
-#' Aggregate an RDD using the given combine functions and a neutral "zero 
value".
-#'
-#' Aggregate the elements of each partition, and then the results for all the
-#' partitions, using given combine functions and a neutral "zero value".
-#' 
-#' @param x An RDD.
-#' @param zeroValue A neutral "zero value".
-#' @param seqOp A function to aggregate the RDD elements. It may return a 
different
-#'              result type from the type of the RDD elements.
-#' @param combOp A function to aggregate results of seqOp.
-#' @return The aggregation result.
-#' @rdname aggregateRDD
-#' @seealso reduce
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(1, 2, 3, 4))
-#' zeroValue <- list(0, 0)
-#' seqOp <- function(x, y) { list(x[[1]] + y, x[[2]] + 1) }
-#' combOp <- function(x, y) { list(x[[1]] + y[[1]], x[[2]] + y[[2]]) }
-#' aggregateRDD(rdd, zeroValue, seqOp, combOp) # list(10, 4)
-#'}
-#' @rdname aggregateRDD
-#' @aliases aggregateRDD,RDD,RDD-method
+# Aggregate an RDD using the given combine functions and a neutral "zero 
value".
+#
+# Aggregate the elements of each partition, and then the results for all the
+# partitions, using given combine functions and a neutral "zero value".
+# 
+# @param x An RDD.
+# @param zeroValue A neutral "zero value".
+# @param seqOp A function to aggregate the RDD elements. It may return a 
different
+#              result type from the type of the RDD elements.
+# @param combOp A function to aggregate results of seqOp.
+# @return The aggregation result.
+# @rdname aggregateRDD
+# @seealso reduce
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(1, 2, 3, 4))
+# zeroValue <- list(0, 0)
+# seqOp <- function(x, y) { list(x[[1]] + y, x[[2]] + 1) }
+# combOp <- function(x, y) { list(x[[1]] + y[[1]], x[[2]] + y[[2]]) }
+# aggregateRDD(rdd, zeroValue, seqOp, combOp) # list(10, 4)
+#}
+# @rdname aggregateRDD
+# @aliases aggregateRDD,RDD,RDD-method
 setMethod("aggregateRDD",
           signature(x = "RDD", zeroValue = "ANY", seqOp = "ANY", combOp = 
"ANY"),
           function(x, zeroValue, seqOp, combOp) {        
@@ -1240,25 +1240,25 @@ setMethod("aggregateRDD",
             Reduce(combOp, partitionList, zeroValue)
           })
 
-#' Pipes elements to a forked external process.
-#'
-#' The same as 'pipe()' in Spark.
-#'
-#' @param x The RDD whose elements are piped to the forked external process.
-#' @param command The command to fork an external process.
-#' @param env A named list to set environment variables of the external 
process.
-#' @return A new RDD created by piping all elements to a forked external 
process.
-#' @rdname pipeRDD
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' collect(pipeRDD(rdd, "more")
-#' Output: c("1", "2", ..., "10")
-#'}
-#' @rdname pipeRDD
-#' @aliases pipeRDD,RDD,character-method
+# Pipes elements to a forked external process.
+#
+# The same as 'pipe()' in Spark.
+#
+# @param x The RDD whose elements are piped to the forked external process.
+# @param command The command to fork an external process.
+# @param env A named list to set environment variables of the external process.
+# @return A new RDD created by piping all elements to a forked external 
process.
+# @rdname pipeRDD
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# collect(pipeRDD(rdd, "more")
+# Output: c("1", "2", ..., "10")
+#}
+# @rdname pipeRDD
+# @aliases pipeRDD,RDD,character-method
 setMethod("pipeRDD",
           signature(x = "RDD", command = "character"),
           function(x, command, env = list()) {
@@ -1274,41 +1274,41 @@ setMethod("pipeRDD",
           })
 
 # TODO: Consider caching the name in the RDD's environment
-#' Return an RDD's name.
-#'
-#' @param x The RDD whose name is returned.
-#' @rdname name
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(1,2,3))
-#' name(rdd) # NULL (if not set before)
-#'}
-#' @rdname name
-#' @aliases name,RDD
+# Return an RDD's name.
+#
+# @param x The RDD whose name is returned.
+# @rdname name
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(1,2,3))
+# name(rdd) # NULL (if not set before)
+#}
+# @rdname name
+# @aliases name,RDD
 setMethod("name",
           signature(x = "RDD"),
           function(x) {
             callJMethod(getJRDD(x), "name")
           })
 
-#' Set an RDD's name.
-#'
-#' @param x The RDD whose name is to be set.
-#' @param name The RDD name to be set.
-#' @return a new RDD renamed.
-#' @rdname setName
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(1,2,3))
-#' setName(rdd, "myRDD")
-#' name(rdd) # "myRDD"
-#'}
-#' @rdname setName
-#' @aliases setName,RDD
+# Set an RDD's name.
+#
+# @param x The RDD whose name is to be set.
+# @param name The RDD name to be set.
+# @return a new RDD renamed.
+# @rdname setName
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(1,2,3))
+# setName(rdd, "myRDD")
+# name(rdd) # "myRDD"
+#}
+# @rdname setName
+# @aliases setName,RDD
 setMethod("setName",
           signature(x = "RDD", name = "character"),
           function(x, name) {
@@ -1316,25 +1316,25 @@ setMethod("setName",
             x
           })
 
-#' Zip an RDD with generated unique Long IDs.
-#'
-#' Items in the kth partition will get ids k, n+k, 2*n+k, ..., where
-#' n is the number of partitions. So there may exist gaps, but this
-#' method won't trigger a spark job, which is different from
-#' zipWithIndex.
-#'
-#' @param x An RDD to be zipped.
-#' @return An RDD with zipped items.
-#' @seealso zipWithIndex
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L)
-#' collect(zipWithUniqueId(rdd)) 
-#' # list(list("a", 0), list("b", 3), list("c", 1), list("d", 4), list("e", 2))
-#'}
-#' @rdname zipWithUniqueId
-#' @aliases zipWithUniqueId,RDD
+# Zip an RDD with generated unique Long IDs.
+#
+# Items in the kth partition will get ids k, n+k, 2*n+k, ..., where
+# n is the number of partitions. So there may exist gaps, but this
+# method won't trigger a spark job, which is different from
+# zipWithIndex.
+#
+# @param x An RDD to be zipped.
+# @return An RDD with zipped items.
+# @seealso zipWithIndex
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L)
+# collect(zipWithUniqueId(rdd)) 
+# # list(list("a", 0), list("b", 3), list("c", 1), list("d", 4), list("e", 2))
+#}
+# @rdname zipWithUniqueId
+# @aliases zipWithUniqueId,RDD
 setMethod("zipWithUniqueId",
           signature(x = "RDD"),
           function(x) {
@@ -1353,28 +1353,28 @@ setMethod("zipWithUniqueId",
             lapplyPartitionsWithIndex(x, partitionFunc)
           })
 
-#' Zip an RDD with its element indices.
-#'
-#' The ordering is first based on the partition index and then the
-#' ordering of items within each partition. So the first item in
-#' the first partition gets index 0, and the last item in the last
-#' partition receives the largest index.
-#'
-#' This method needs to trigger a Spark job when this RDD contains
-#' more than one partition.
-#'
-#' @param x An RDD to be zipped.
-#' @return An RDD with zipped items.
-#' @seealso zipWithUniqueId
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L)
-#' collect(zipWithIndex(rdd))
-#' # list(list("a", 0), list("b", 1), list("c", 2), list("d", 3), list("e", 4))
-#'}
-#' @rdname zipWithIndex
-#' @aliases zipWithIndex,RDD
+# Zip an RDD with its element indices.
+#
+# The ordering is first based on the partition index and then the
+# ordering of items within each partition. So the first item in
+# the first partition gets index 0, and the last item in the last
+# partition receives the largest index.
+#
+# This method needs to trigger a Spark job when this RDD contains
+# more than one partition.
+#
+# @param x An RDD to be zipped.
+# @return An RDD with zipped items.
+# @seealso zipWithUniqueId
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L)
+# collect(zipWithIndex(rdd))
+# # list(list("a", 0), list("b", 1), list("c", 2), list("d", 3), list("e", 4))
+#}
+# @rdname zipWithIndex
+# @aliases zipWithIndex,RDD
 setMethod("zipWithIndex",
           signature(x = "RDD"),
           function(x) {
@@ -1406,20 +1406,20 @@ setMethod("zipWithIndex",
            lapplyPartitionsWithIndex(x, partitionFunc)
          })
 
-#' Coalesce all elements within each partition of an RDD into a list.
-#'
-#' @param x An RDD.
-#' @return An RDD created by coalescing all elements within
-#'         each partition into a list.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, as.list(1:4), 2L)
-#' collect(glom(rdd))
-#' # list(list(1, 2), list(3, 4))
-#'}
-#' @rdname glom
-#' @aliases glom,RDD
+# Coalesce all elements within each partition of an RDD into a list.
+#
+# @param x An RDD.
+# @return An RDD created by coalescing all elements within
+#         each partition into a list.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, as.list(1:4), 2L)
+# collect(glom(rdd))
+# # list(list(1, 2), list(3, 4))
+#}
+# @rdname glom
+# @aliases glom,RDD
 setMethod("glom",
           signature(x = "RDD"),
           function(x) {
@@ -1432,21 +1432,21 @@ setMethod("glom",
 
 ############ Binary Functions #############
 
-#' Return the union RDD of two RDDs.
-#' The same as union() in Spark.
-#'
-#' @param x An RDD.
-#' @param y An RDD.
-#' @return a new RDD created by performing the simple union (witout removing
-#' duplicates) of two input RDDs.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:3)
-#' unionRDD(rdd, rdd) # 1, 2, 3, 1, 2, 3
-#'}
-#' @rdname unionRDD
-#' @aliases unionRDD,RDD,RDD-method
+# Return the union RDD of two RDDs.
+# The same as union() in Spark.
+#
+# @param x An RDD.
+# @param y An RDD.
+# @return a new RDD created by performing the simple union (witout removing
+# duplicates) of two input RDDs.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:3)
+# unionRDD(rdd, rdd) # 1, 2, 3, 1, 2, 3
+#}
+# @rdname unionRDD
+# @aliases unionRDD,RDD,RDD-method
 setMethod("unionRDD",
           signature(x = "RDD", y = "RDD"),
           function(x, y) {
@@ -1463,27 +1463,27 @@ setMethod("unionRDD",
             union.rdd
           })
 
-#' Zip an RDD with another RDD.
-#'
-#' Zips this RDD with another one, returning key-value pairs with the
-#' first element in each RDD second element in each RDD, etc. Assumes
-#' that the two RDDs have the same number of partitions and the same
-#' number of elements in each partition (e.g. one was made through
-#' a map on the other).
-#'
-#' @param x An RDD to be zipped.
-#' @param other Another RDD to be zipped.
-#' @return An RDD zipped from the two RDDs.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd1 <- parallelize(sc, 0:4)
-#' rdd2 <- parallelize(sc, 1000:1004)
-#' collect(zipRDD(rdd1, rdd2))
-#' # list(list(0, 1000), list(1, 1001), list(2, 1002), list(3, 1003), list(4, 
1004))
-#'}
-#' @rdname zipRDD
-#' @aliases zipRDD,RDD
+# Zip an RDD with another RDD.
+#
+# Zips this RDD with another one, returning key-value pairs with the
+# first element in each RDD second element in each RDD, etc. Assumes
+# that the two RDDs have the same number of partitions and the same
+# number of elements in each partition (e.g. one was made through
+# a map on the other).
+#
+# @param x An RDD to be zipped.
+# @param other Another RDD to be zipped.
+# @return An RDD zipped from the two RDDs.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd1 <- parallelize(sc, 0:4)
+# rdd2 <- parallelize(sc, 1000:1004)
+# collect(zipRDD(rdd1, rdd2))
+# # list(list(0, 1000), list(1, 1001), list(2, 1002), list(3, 1003), list(4, 
1004))
+#}
+# @rdname zipRDD
+# @aliases zipRDD,RDD
 setMethod("zipRDD",
           signature(x = "RDD", other = "RDD"),
           function(x, other) {
@@ -1502,24 +1502,24 @@ setMethod("zipRDD",
             mergePartitions(rdd, TRUE)
           })
 
-#' Cartesian product of this RDD and another one.
-#'
-#' Return the Cartesian product of this RDD and another one, 
-#' that is, the RDD of all pairs of elements (a, b) where a 
-#' is in this and b is in other.
-#' 
-#' @param x An RDD.
-#' @param other An RDD.
-#' @return A new RDD which is the Cartesian product of these two RDDs.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:2)
-#' sortByKey(cartesian(rdd, rdd)) 
-#' # list(list(1, 1), list(1, 2), list(2, 1), list(2, 2))
-#'}
-#' @rdname cartesian
-#' @aliases cartesian,RDD,RDD-method
+# Cartesian product of this RDD and another one.
+#
+# Return the Cartesian product of this RDD and another one, 
+# that is, the RDD of all pairs of elements (a, b) where a 
+# is in this and b is in other.
+# 
+# @param x An RDD.
+# @param other An RDD.
+# @return A new RDD which is the Cartesian product of these two RDDs.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:2)
+# sortByKey(cartesian(rdd, rdd)) 
+# # list(list(1, 1), list(1, 2), list(2, 1), list(2, 2))
+#}
+# @rdname cartesian
+# @aliases cartesian,RDD,RDD-method
 setMethod("cartesian",
           signature(x = "RDD", other = "RDD"),
           function(x, other) {
@@ -1532,24 +1532,24 @@ setMethod("cartesian",
             mergePartitions(rdd, FALSE)
           })
 
-#' Subtract an RDD with another RDD.
-#'
-#' Return an RDD with the elements from this that are not in other.
-#'
-#' @param x An RDD.
-#' @param other An RDD.
-#' @param numPartitions Number of the partitions in the result RDD.
-#' @return An RDD with the elements from this that are not in other.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd1 <- parallelize(sc, list(1, 1, 2, 2, 3, 4))
-#' rdd2 <- parallelize(sc, list(2, 4))
-#' collect(subtract(rdd1, rdd2))
-#' # list(1, 1, 3)
-#'}
-#' @rdname subtract
-#' @aliases subtract,RDD
+# Subtract an RDD with another RDD.
+#
+# Return an RDD with the elements from this that are not in other.
+#
+# @param x An RDD.
+# @param other An RDD.
+# @param numPartitions Number of the partitions in the result RDD.
+# @return An RDD with the elements from this that are not in other.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd1 <- parallelize(sc, list(1, 1, 2, 2, 3, 4))
+# rdd2 <- parallelize(sc, list(2, 4))
+# collect(subtract(rdd1, rdd2))
+# # list(1, 1, 3)
+#}
+# @rdname subtract
+# @aliases subtract,RDD
 setMethod("subtract",
           signature(x = "RDD", other = "RDD"),
           function(x, other, numPartitions = SparkR:::numPartitions(x)) {
@@ -1559,28 +1559,28 @@ setMethod("subtract",
             keys(subtractByKey(rdd1, rdd2, numPartitions))
           })
 
-#' Intersection of this RDD and another one.
-#'
-#' Return the intersection of this RDD and another one.
-#' The output will not contain any duplicate elements,
-#' even if the input RDDs did. Performs a hash partition
-#' across the cluster.
-#' Note that this method performs a shuffle internally.
-#'
-#' @param x An RDD.
-#' @param other An RDD.
-#' @param numPartitions The number of partitions in the result RDD.
-#' @return An RDD which is the intersection of these two RDDs.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd1 <- parallelize(sc, list(1, 10, 2, 3, 4, 5))
-#' rdd2 <- parallelize(sc, list(1, 6, 2, 3, 7, 8))
-#' collect(sortBy(intersection(rdd1, rdd2), function(x) { x }))
-#' # list(1, 2, 3)
-#'}
-#' @rdname intersection
-#' @aliases intersection,RDD
+# Intersection of this RDD and another one.
+#
+# Return the intersection of this RDD and another one.
+# The output will not contain any duplicate elements,
+# even if the input RDDs did. Performs a hash partition
+# across the cluster.
+# Note that this method performs a shuffle internally.
+#
+# @param x An RDD.
+# @param other An RDD.
+# @param numPartitions The number of partitions in the result RDD.
+# @return An RDD which is the intersection of these two RDDs.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd1 <- parallelize(sc, list(1, 10, 2, 3, 4, 5))
+# rdd2 <- parallelize(sc, list(1, 6, 2, 3, 7, 8))
+# collect(sortBy(intersection(rdd1, rdd2), function(x) { x }))
+# # list(1, 2, 3)
+#}
+# @rdname intersection
+# @aliases intersection,RDD
 setMethod("intersection",
           signature(x = "RDD", other = "RDD"),
           function(x, other, numPartitions = SparkR:::numPartitions(x)) {
@@ -1596,26 +1596,26 @@ setMethod("intersection",
             keys(filterRDD(cogroup(rdd1, rdd2, numPartitions = numPartitions), 
filterFunction))
           })
 
-#' Zips an RDD's partitions with one (or more) RDD(s).
-#' Same as zipPartitions in Spark.
-#' 
-#' @param ... RDDs to be zipped.
-#' @param func A function to transform zipped partitions.
-#' @return A new RDD by applying a function to the zipped partitions. 
-#'         Assumes that all the RDDs have the *same number of partitions*, but 
-#'         does *not* require them to have the same number of elements in each 
partition.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd1 <- parallelize(sc, 1:2, 2L)  # 1, 2
-#' rdd2 <- parallelize(sc, 1:4, 2L)  # 1:2, 3:4
-#' rdd3 <- parallelize(sc, 1:6, 2L)  # 1:3, 4:6
-#' collect(zipPartitions(rdd1, rdd2, rdd3, 
-#'                       func = function(x, y, z) { list(list(x, y, z))} ))
-#' # list(list(1, c(1,2), c(1,2,3)), list(2, c(3,4), c(4,5,6)))
-#'}
-#' @rdname zipRDD
-#' @aliases zipPartitions,RDD
+# Zips an RDD's partitions with one (or more) RDD(s).
+# Same as zipPartitions in Spark.
+# 
+# @param ... RDDs to be zipped.
+# @param func A function to transform zipped partitions.
+# @return A new RDD by applying a function to the zipped partitions. 
+#         Assumes that all the RDDs have the *same number of partitions*, but 
+#         does *not* require them to have the same number of elements in each 
partition.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd1 <- parallelize(sc, 1:2, 2L)  # 1, 2
+# rdd2 <- parallelize(sc, 1:4, 2L)  # 1:2, 3:4
+# rdd3 <- parallelize(sc, 1:6, 2L)  # 1:3, 4:6
+# collect(zipPartitions(rdd1, rdd2, rdd3, 
+#                       func = function(x, y, z) { list(list(x, y, z))} ))
+# # list(list(1, c(1,2), c(1,2,3)), list(2, c(3,4), c(4,5,6)))
+#}
+# @rdname zipRDD
+# @aliases zipPartitions,RDD
 setMethod("zipPartitions",
           "RDD",
           function(..., func) {

http://git-wip-us.apache.org/repos/asf/spark/blob/4f01f5b5/R/pkg/R/SQLContext.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 4f05ba5..cae06e6 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -150,21 +150,21 @@ createDataFrame <- function(sqlCtx, data, schema = NULL, 
samplingRatio = 1.0) {
   dataFrame(sdf)
 }
 
-#' toDF
-#'
-#' Converts an RDD to a DataFrame by infer the types.
-#'
-#' @param x An RDD
-#'
-#' @rdname DataFrame
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlCtx <- sparkRSQL.init(sc)
-#' rdd <- lapply(parallelize(sc, 1:10), function(x) list(a=x, 
b=as.character(x)))
-#' df <- toDF(rdd)
-#' }
+# toDF
+#
+# Converts an RDD to a DataFrame by infer the types.
+#
+# @param x An RDD
+#
+# @rdname DataFrame
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# sqlCtx <- sparkRSQL.init(sc)
+# rdd <- lapply(parallelize(sc, 1:10), function(x) list(a=x, 
b=as.character(x)))
+# df <- toDF(rdd)
+# }
 
 setGeneric("toDF", function(x, ...) { standardGeneric("toDF") })
 
@@ -207,23 +207,23 @@ jsonFile <- function(sqlCtx, path) {
 }
 
 
-#' JSON RDD
-#'
-#' Loads an RDD storing one JSON object per string as a DataFrame.
-#'
-#' @param sqlCtx SQLContext to use
-#' @param rdd An RDD of JSON string
-#' @param schema A StructType object to use as schema
-#' @param samplingRatio The ratio of simpling used to infer the schema
-#' @return A DataFrame
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlCtx <- sparkRSQL.init(sc)
-#' rdd <- texFile(sc, "path/to/json")
-#' df <- jsonRDD(sqlCtx, rdd)
-#' }
+# JSON RDD
+#
+# Loads an RDD storing one JSON object per string as a DataFrame.
+#
+# @param sqlCtx SQLContext to use
+# @param rdd An RDD of JSON string
+# @param schema A StructType object to use as schema
+# @param samplingRatio The ratio of simpling used to infer the schema
+# @return A DataFrame
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# sqlCtx <- sparkRSQL.init(sc)
+# rdd <- texFile(sc, "path/to/json")
+# df <- jsonRDD(sqlCtx, rdd)
+# }
 
 # TODO: support schema
 jsonRDD <- function(sqlCtx, rdd, schema = NULL, samplingRatio = 1.0) {

http://git-wip-us.apache.org/repos/asf/spark/blob/4f01f5b5/R/pkg/R/broadcast.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/broadcast.R b/R/pkg/R/broadcast.R
index 583fa2e..23dc387 100644
--- a/R/pkg/R/broadcast.R
+++ b/R/pkg/R/broadcast.R
@@ -23,21 +23,21 @@
 .broadcastValues <- new.env()
 .broadcastIdToName <- new.env()
 
-#' @title S4 class that represents a Broadcast variable
-#' @description Broadcast variables can be created using the broadcast
-#'              function from a \code{SparkContext}.
-#' @rdname broadcast-class
-#' @seealso broadcast 
-#'
-#' @param id Id of the backing Spark broadcast variable 
-#' @export
+# @title S4 class that represents a Broadcast variable
+# @description Broadcast variables can be created using the broadcast
+#              function from a \code{SparkContext}.
+# @rdname broadcast-class
+# @seealso broadcast 
+#
+# @param id Id of the backing Spark broadcast variable 
+# @export
 setClass("Broadcast", slots = list(id = "character"))
 
-#' @rdname broadcast-class
-#' @param value Value of the broadcast variable
-#' @param jBroadcastRef reference to the backing Java broadcast object
-#' @param objName name of broadcasted object
-#' @export
+# @rdname broadcast-class
+# @param value Value of the broadcast variable
+# @param jBroadcastRef reference to the backing Java broadcast object
+# @param objName name of broadcasted object
+# @export
 Broadcast <- function(id, value, jBroadcastRef, objName) {
   .broadcastValues[[id]] <- value
   .broadcastNames[[as.character(objName)]] <- jBroadcastRef
@@ -45,13 +45,13 @@ Broadcast <- function(id, value, jBroadcastRef, objName) {
   new("Broadcast", id = id)
 }
 
-#' @description
-#' \code{value} can be used to get the value of a broadcast variable inside
-#' a distributed function.
-#'
-#' @param bcast The broadcast variable to get
-#' @rdname broadcast
-#' @aliases value,Broadcast-method
+# @description
+# \code{value} can be used to get the value of a broadcast variable inside
+# a distributed function.
+#
+# @param bcast The broadcast variable to get
+# @rdname broadcast
+# @aliases value,Broadcast-method
 setMethod("value",
           signature(bcast = "Broadcast"),
           function(bcast) {
@@ -62,24 +62,24 @@ setMethod("value",
             }
           })
 
-#' Internal function to set values of a broadcast variable.
-#'
-#' This function is used internally by Spark to set the value of a broadcast
-#' variable on workers. Not intended for use outside the package.
-#'
-#' @rdname broadcast-internal
-#' @seealso broadcast, value 
+# Internal function to set values of a broadcast variable.
+#
+# This function is used internally by Spark to set the value of a broadcast
+# variable on workers. Not intended for use outside the package.
+#
+# @rdname broadcast-internal
+# @seealso broadcast, value 
 
-#' @param bcastId The id of broadcast variable to set
-#' @param value The value to be set
-#' @export
+# @param bcastId The id of broadcast variable to set
+# @param value The value to be set
+# @export
 setBroadcastValue <- function(bcastId, value) {
   bcastIdStr <- as.character(bcastId)
   .broadcastValues[[bcastIdStr]] <- value
 }
 
-#' Helper function to clear the list of broadcast variables we know about
-#' Should be called when the SparkR JVM backend is shutdown
+# Helper function to clear the list of broadcast variables we know about
+# Should be called when the SparkR JVM backend is shutdown
 clearBroadcastVariables <- function() {
   bcasts <- ls(.broadcastNames)
   rm(list = bcasts, envir = .broadcastNames)

http://git-wip-us.apache.org/repos/asf/spark/blob/4f01f5b5/R/pkg/R/context.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
index b4845b6..43be9c9 100644
--- a/R/pkg/R/context.R
+++ b/R/pkg/R/context.R
@@ -25,27 +25,27 @@ getMinPartitions <- function(sc, minPartitions) {
   as.integer(minPartitions)
 }
 
-#' Create an RDD from a text file.
-#'
-#' This function reads a text file from HDFS, a local file system (available 
on all
-#' nodes), or any Hadoop-supported file system URI, and creates an
-#' RDD of strings from it.
-#'
-#' @param sc SparkContext to use
-#' @param path Path of file to read. A vector of multiple paths is allowed.
-#' @param minPartitions Minimum number of partitions to be created. If NULL, 
the default
-#'  value is chosen based on available parallelism.
-#' @return RDD where each item is of type \code{character}
-#' @export
-#' @examples
-#'\dontrun{
-#'  sc <- sparkR.init()
-#'  lines <- textFile(sc, "myfile.txt")
-#'}
+# Create an RDD from a text file.
+#
+# This function reads a text file from HDFS, a local file system (available on 
all
+# nodes), or any Hadoop-supported file system URI, and creates an
+# RDD of strings from it.
+#
+# @param sc SparkContext to use
+# @param path Path of file to read. A vector of multiple paths is allowed.
+# @param minPartitions Minimum number of partitions to be created. If NULL, 
the default
+#  value is chosen based on available parallelism.
+# @return RDD where each item is of type \code{character}
+# @export
+# @examples
+#\dontrun{
+#  sc <- sparkR.init()
+#  lines <- textFile(sc, "myfile.txt")
+#}
 textFile <- function(sc, path, minPartitions = NULL) {
   # Allow the user to have a more flexible definiton of the text file path
   path <- suppressWarnings(normalizePath(path))
-  #' Convert a string vector of paths to a string containing comma separated 
paths
+  # Convert a string vector of paths to a string containing comma separated 
paths
   path <- paste(path, collapse = ",")
 
   jrdd <- callJMethod(sc, "textFile", path, getMinPartitions(sc, 
minPartitions))
@@ -53,27 +53,27 @@ textFile <- function(sc, path, minPartitions = NULL) {
   RDD(jrdd, "string")
 }
 
-#' Load an RDD saved as a SequenceFile containing serialized objects.
-#'
-#' The file to be loaded should be one that was previously generated by calling
-#' saveAsObjectFile() of the RDD class.
-#'
-#' @param sc SparkContext to use
-#' @param path Path of file to read. A vector of multiple paths is allowed.
-#' @param minPartitions Minimum number of partitions to be created. If NULL, 
the default
-#'  value is chosen based on available parallelism.
-#' @return RDD containing serialized R objects.
-#' @seealso saveAsObjectFile
-#' @export
-#' @examples
-#'\dontrun{
-#'  sc <- sparkR.init()
-#'  rdd <- objectFile(sc, "myfile")
-#'}
+# Load an RDD saved as a SequenceFile containing serialized objects.
+#
+# The file to be loaded should be one that was previously generated by calling
+# saveAsObjectFile() of the RDD class.
+#
+# @param sc SparkContext to use
+# @param path Path of file to read. A vector of multiple paths is allowed.
+# @param minPartitions Minimum number of partitions to be created. If NULL, 
the default
+#  value is chosen based on available parallelism.
+# @return RDD containing serialized R objects.
+# @seealso saveAsObjectFile
+# @export
+# @examples
+#\dontrun{
+#  sc <- sparkR.init()
+#  rdd <- objectFile(sc, "myfile")
+#}
 objectFile <- function(sc, path, minPartitions = NULL) {
   # Allow the user to have a more flexible definiton of the text file path
   path <- suppressWarnings(normalizePath(path))
-  #' Convert a string vector of paths to a string containing comma separated 
paths
+  # Convert a string vector of paths to a string containing comma separated 
paths
   path <- paste(path, collapse = ",")
 
   jrdd <- callJMethod(sc, "objectFile", path, getMinPartitions(sc, 
minPartitions))
@@ -81,24 +81,24 @@ objectFile <- function(sc, path, minPartitions = NULL) {
   RDD(jrdd, "byte")
 }
 
-#' Create an RDD from a homogeneous list or vector.
-#'
-#' This function creates an RDD from a local homogeneous list in R. The 
elements
-#' in the list are split into \code{numSlices} slices and distributed to nodes
-#' in the cluster.
-#'
-#' @param sc SparkContext to use
-#' @param coll collection to parallelize
-#' @param numSlices number of partitions to create in the RDD
-#' @return an RDD created from this collection
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10, 2)
-#' # The RDD should contain 10 elements
-#' length(rdd)
-#'}
+# Create an RDD from a homogeneous list or vector.
+#
+# This function creates an RDD from a local homogeneous list in R. The elements
+# in the list are split into \code{numSlices} slices and distributed to nodes
+# in the cluster.
+#
+# @param sc SparkContext to use
+# @param coll collection to parallelize
+# @param numSlices number of partitions to create in the RDD
+# @return an RDD created from this collection
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10, 2)
+# # The RDD should contain 10 elements
+# length(rdd)
+#}
 parallelize <- function(sc, coll, numSlices = 1) {
   # TODO: bound/safeguard numSlices
   # TODO: unit tests for if the split works for all primitives
@@ -133,33 +133,33 @@ parallelize <- function(sc, coll, numSlices = 1) {
   RDD(jrdd, "byte")
 }
 
-#' Include this specified package on all workers
-#'
-#' This function can be used to include a package on all workers before the
-#' user's code is executed. This is useful in scenarios where other R package
-#' functions are used in a function passed to functions like \code{lapply}.
-#' NOTE: The package is assumed to be installed on every node in the Spark
-#' cluster.
-#'
-#' @param sc SparkContext to use
-#' @param pkg Package name
-#'
-#' @export
-#' @examples
-#'\dontrun{
-#'  library(Matrix)
-#'
-#'  sc <- sparkR.init()
-#'  # Include the matrix library we will be using
-#'  includePackage(sc, Matrix)
-#'
-#'  generateSparse <- function(x) {
-#'    sparseMatrix(i=c(1, 2, 3), j=c(1, 2, 3), x=c(1, 2, 3))
-#'  }
-#'
-#'  rdd <- lapplyPartition(parallelize(sc, 1:2, 2L), generateSparse)
-#'  collect(rdd)
-#'}
+# Include this specified package on all workers
+#
+# This function can be used to include a package on all workers before the
+# user's code is executed. This is useful in scenarios where other R package
+# functions are used in a function passed to functions like \code{lapply}.
+# NOTE: The package is assumed to be installed on every node in the Spark
+# cluster.
+#
+# @param sc SparkContext to use
+# @param pkg Package name
+#
+# @export
+# @examples
+#\dontrun{
+#  library(Matrix)
+#
+#  sc <- sparkR.init()
+#  # Include the matrix library we will be using
+#  includePackage(sc, Matrix)
+#
+#  generateSparse <- function(x) {
+#    sparseMatrix(i=c(1, 2, 3), j=c(1, 2, 3), x=c(1, 2, 3))
+#  }
+#
+#  rdd <- lapplyPartition(parallelize(sc, 1:2, 2L), generateSparse)
+#  collect(rdd)
+#}
 includePackage <- function(sc, pkg) {
   pkg <- as.character(substitute(pkg))
   if (exists(".packages", .sparkREnv)) {
@@ -171,30 +171,30 @@ includePackage <- function(sc, pkg) {
   .sparkREnv$.packages <- packages
 }
 
-#' @title Broadcast a variable to all workers
-#'
-#' @description
-#' Broadcast a read-only variable to the cluster, returning a \code{Broadcast}
-#' object for reading it in distributed functions.
-#'
-#' @param sc Spark Context to use
-#' @param object Object to be broadcast
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:2, 2L)
-#'
-#' # Large Matrix object that we want to broadcast
-#' randomMat <- matrix(nrow=100, ncol=10, data=rnorm(1000))
-#' randomMatBr <- broadcast(sc, randomMat)
-#'
-#' # Use the broadcast variable inside the function
-#' useBroadcast <- function(x) {
-#'   sum(value(randomMatBr) * x)
-#' }
-#' sumRDD <- lapply(rdd, useBroadcast)
-#'}
+# @title Broadcast a variable to all workers
+#
+# @description
+# Broadcast a read-only variable to the cluster, returning a \code{Broadcast}
+# object for reading it in distributed functions.
+#
+# @param sc Spark Context to use
+# @param object Object to be broadcast
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:2, 2L)
+#
+# # Large Matrix object that we want to broadcast
+# randomMat <- matrix(nrow=100, ncol=10, data=rnorm(1000))
+# randomMatBr <- broadcast(sc, randomMat)
+#
+# # Use the broadcast variable inside the function
+# useBroadcast <- function(x) {
+#   sum(value(randomMatBr) * x)
+# }
+# sumRDD <- lapply(rdd, useBroadcast)
+#}
 broadcast <- function(sc, object) {
   objName <- as.character(substitute(object))
   serializedObj <- serialize(object, connection = NULL)
@@ -205,21 +205,21 @@ broadcast <- function(sc, object) {
   Broadcast(id, object, jBroadcast, objName)
 }
 
-#' @title Set the checkpoint directory
-#'
-#' Set the directory under which RDDs are going to be checkpointed. The
-#' directory must be a HDFS path if running on a cluster.
-#'
-#' @param sc Spark Context to use
-#' @param dirName Directory path
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' setCheckpointDir(sc, "~/checkpoint")
-#' rdd <- parallelize(sc, 1:2, 2L)
-#' checkpoint(rdd)
-#'}
+# @title Set the checkpoint directory
+#
+# Set the directory under which RDDs are going to be checkpointed. The
+# directory must be a HDFS path if running on a cluster.
+#
+# @param sc Spark Context to use
+# @param dirName Directory path
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# setCheckpointDir(sc, "~/checkpoint")
+# rdd <- parallelize(sc, 1:2, 2L)
+# checkpoint(rdd)
+#}
 setCheckpointDir <- function(sc, dirName) {
   invisible(callJMethod(sc, "setCheckpointDir", 
suppressWarnings(normalizePath(dirName))))
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[2/2] spark git commit: [SPARK-6824] Fill the docs for DataFrame API in SparkR

Reply via email to