Repository: spark
Updated Branches:
  refs/heads/branch-2.0 01a4d69f3 -> b65b041af


[SPARK-16508][SPARKR] doc updates and more CRAN check fixes

replace ``` ` ``` in code doc with `\code{thing}`
remove added `...` for drop(DataFrame)
fix remaining CRAN check warnings

create doc with knitr

junyangq

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #14734 from felixcheung/rdoccleanup.

(cherry picked from commit 71afeeea4ec8e67edc95b5d504c557c88a2598b9)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b65b041a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b65b041a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b65b041a

Branch: refs/heads/branch-2.0
Commit: b65b041af8b64413c7d460d4ea110b2044d6f36e
Parents: 01a4d69
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Mon Aug 22 15:53:10 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Mon Aug 22 16:17:18 2016 -0700

----------------------------------------------------------------------
 R/pkg/NAMESPACE      |  6 ++++-
 R/pkg/R/DataFrame.R  | 69 +++++++++++++++++++++++------------------------
 R/pkg/R/RDD.R        | 10 +++----
 R/pkg/R/SQLContext.R | 30 ++++++++++-----------
 R/pkg/R/WindowSpec.R | 23 ++++++++--------
 R/pkg/R/column.R     |  2 +-
 R/pkg/R/functions.R  | 36 ++++++++++++-------------
 R/pkg/R/generics.R   | 14 +++++-----
 R/pkg/R/group.R      |  1 +
 R/pkg/R/mllib.R      |  5 ++--
 R/pkg/R/pairRDD.R    |  6 ++---
 R/pkg/R/stats.R      | 14 +++++-----
 12 files changed, 110 insertions(+), 106 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/b65b041a/R/pkg/NAMESPACE
----------------------------------------------------------------------
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index aaab92f..cdb8834 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -1,5 +1,9 @@
 # Imports from base R
-importFrom(methods, setGeneric, setMethod, setOldClass)
+# Do not include stats:: "rpois", "runif" - causes error at runtime
+importFrom("methods", "setGeneric", "setMethod", "setOldClass")
+importFrom("methods", "is", "new", "signature", "show")
+importFrom("stats", "gaussian", "setNames")
+importFrom("utils", "download.file", "packageVersion", "untar")
 
 # Disable native libraries till we figure out how to package it
 # See SPARKR-7839

http://git-wip-us.apache.org/repos/asf/spark/blob/b65b041a/R/pkg/R/DataFrame.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 0266939..f8a05c6 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -150,7 +150,7 @@ setMethod("explain",
 
 #' isLocal
 #'
-#' Returns True if the `collect` and `take` methods can be run locally
+#' Returns True if the \code{collect} and \code{take} methods can be run 
locally
 #' (without any Spark executors).
 #'
 #' @param x A SparkDataFrame
@@ -635,10 +635,10 @@ setMethod("unpersist",
 #' The following options for repartition are possible:
 #' \itemize{
 #'  \item{1.} {Return a new SparkDataFrame partitioned by
-#'                      the given columns into `numPartitions`.}
-#'  \item{2.} {Return a new SparkDataFrame that has exactly `numPartitions`.}
+#'                      the given columns into \code{numPartitions}.}
+#'  \item{2.} {Return a new SparkDataFrame that has exactly 
\code{numPartitions}.}
 #'  \item{3.} {Return a new SparkDataFrame partitioned by the given column(s),
-#'                      using `spark.sql.shuffle.partitions` as number of 
partitions.}
+#'                      using \code{spark.sql.shuffle.partitions} as number of 
partitions.}
 #'}
 #' @param x a SparkDataFrame.
 #' @param numPartitions the number of partitions to use.
@@ -1125,9 +1125,8 @@ setMethod("take",
 
 #' Head
 #'
-#' Return the first NUM rows of a SparkDataFrame as a R data.frame. If NUM is 
NULL,
-#' then head() returns the first 6 rows in keeping with the current data.frame
-#' convention in R.
+#' Return the first \code{num} rows of a SparkDataFrame as a R data.frame. If 
\code{num} is not
+#' specified, then head() returns the first 6 rows as with R data.frame.
 #'
 #' @param x a SparkDataFrame.
 #' @param num the number of rows to return. Default is 6.
@@ -1399,11 +1398,11 @@ setMethod("dapplyCollect",
 #'
 #' @param cols grouping columns.
 #' @param func a function to be applied to each group partition specified by 
grouping
-#'             column of the SparkDataFrame. The function `func` takes as 
argument
+#'             column of the SparkDataFrame. The function \code{func} takes as 
argument
 #'             a key - grouping columns and a data frame - a local R 
data.frame.
-#'             The output of `func` is a local R data.frame.
+#'             The output of \code{func} is a local R data.frame.
 #' @param schema the schema of the resulting SparkDataFrame after the function 
is applied.
-#'               The schema must match to output of `func`. It has to be 
defined for each
+#'               The schema must match to output of \code{func}. It has to be 
defined for each
 #'               output column with preferred output column name and 
corresponding data type.
 #' @return A SparkDataFrame.
 #' @family SparkDataFrame functions
@@ -1490,9 +1489,9 @@ setMethod("gapply",
 #'
 #' @param cols grouping columns.
 #' @param func a function to be applied to each group partition specified by 
grouping
-#'             column of the SparkDataFrame. The function `func` takes as 
argument
+#'             column of the SparkDataFrame. The function \code{func} takes as 
argument
 #'             a key - grouping columns and a data frame - a local R 
data.frame.
-#'             The output of `func` is a local R data.frame.
+#'             The output of \code{func} is a local R data.frame.
 #' @return A data.frame.
 #' @family SparkDataFrame functions
 #' @aliases gapplyCollect,SparkDataFrame-method
@@ -1650,7 +1649,7 @@ setMethod("$", signature(x = "SparkDataFrame"),
             getColumn(x, name)
           })
 
-#' @param value a Column or NULL. If NULL, the specified Column is dropped.
+#' @param value a Column or \code{NULL}. If \code{NULL}, the specified Column 
is dropped.
 #' @rdname select
 #' @name $<-
 #' @aliases $<-,SparkDataFrame-method
@@ -1740,7 +1739,7 @@ setMethod("[", signature(x = "SparkDataFrame"),
 #' @family subsetting functions
 #' @examples
 #' \dontrun{
-#'   # Columns can be selected using `[[` and `[`
+#'   # Columns can be selected using [[ and [
 #'   df[[2]] == df[["age"]]
 #'   df[,2] == df[,"age"]
 #'   df[,c("name", "age")]
@@ -1785,7 +1784,7 @@ setMethod("subset", signature(x = "SparkDataFrame"),
 #'   select(df, df$name, df$age + 1)
 #'   select(df, c("col1", "col2"))
 #'   select(df, list(df$name, df$age + 1))
-#'   # Similar to R data frames columns can also be selected using `$`
+#'   # Similar to R data frames columns can also be selected using $
 #'   df[,df$age]
 #' }
 #' @note select(SparkDataFrame, character) since 1.4.0
@@ -2436,7 +2435,7 @@ generateAliasesForIntersectedCols <- function (x, 
intersectedColNames, suffix) {
 #' Return a new SparkDataFrame containing the union of rows
 #'
 #' Return a new SparkDataFrame containing the union of rows in this 
SparkDataFrame
-#' and another SparkDataFrame. This is equivalent to `UNION ALL` in SQL.
+#' and another SparkDataFrame. This is equivalent to \code{UNION ALL} in SQL.
 #' Note that this does not remove duplicate rows across the two 
SparkDataFrames.
 #'
 #' @param x A SparkDataFrame
@@ -2479,7 +2478,7 @@ setMethod("unionAll",
 
 #' Union two or more SparkDataFrames
 #'
-#' Union two or more SparkDataFrames. This is equivalent to `UNION ALL` in SQL.
+#' Union two or more SparkDataFrames. This is equivalent to \code{UNION ALL} 
in SQL.
 #' Note that this does not remove duplicate rows across the two 
SparkDataFrames.
 #'
 #' @param x a SparkDataFrame.
@@ -2512,7 +2511,7 @@ setMethod("rbind",
 #' Intersect
 #'
 #' Return a new SparkDataFrame containing rows only in both this SparkDataFrame
-#' and another SparkDataFrame. This is equivalent to `INTERSECT` in SQL.
+#' and another SparkDataFrame. This is equivalent to \code{INTERSECT} in SQL.
 #'
 #' @param x A SparkDataFrame
 #' @param y A SparkDataFrame
@@ -2540,7 +2539,7 @@ setMethod("intersect",
 #' except
 #'
 #' Return a new SparkDataFrame containing rows in this SparkDataFrame
-#' but not in another SparkDataFrame. This is equivalent to `EXCEPT` in SQL.
+#' but not in another SparkDataFrame. This is equivalent to \code{EXCEPT} in 
SQL.
 #'
 #' @param x a SparkDataFrame.
 #' @param y a SparkDataFrame.
@@ -2569,8 +2568,8 @@ setMethod("except",
 
 #' Save the contents of SparkDataFrame to a data source.
 #'
-#' The data source is specified by the `source` and a set of options (...).
-#' If `source` is not specified, the default data source configured by
+#' The data source is specified by the \code{source} and a set of options 
(...).
+#' If \code{source} is not specified, the default data source configured by
 #' spark.sql.sources.default will be used.
 #'
 #' Additionally, mode is used to specify the behavior of the save operation 
when data already
@@ -2606,7 +2605,7 @@ setMethod("except",
 #' @note write.df since 1.4.0
 setMethod("write.df",
           signature(df = "SparkDataFrame", path = "character"),
-          function(df, path, source = NULL, mode = "error", ...){
+          function(df, path, source = NULL, mode = "error", ...) {
             if (is.null(source)) {
               source <- getDefaultSqlSource()
             }
@@ -2628,14 +2627,14 @@ setMethod("write.df",
 #' @note saveDF since 1.4.0
 setMethod("saveDF",
           signature(df = "SparkDataFrame", path = "character"),
-          function(df, path, source = NULL, mode = "error", ...){
+          function(df, path, source = NULL, mode = "error", ...) {
             write.df(df, path, source, mode, ...)
           })
 
 #' Save the contents of the SparkDataFrame to a data source as a table
 #'
-#' The data source is specified by the `source` and a set of options (...).
-#' If `source` is not specified, the default data source configured by
+#' The data source is specified by the \code{source} and a set of options 
(...).
+#' If \code{source} is not specified, the default data source configured by
 #' spark.sql.sources.default will be used.
 #'
 #' Additionally, mode is used to specify the behavior of the save operation 
when
@@ -2668,7 +2667,7 @@ setMethod("saveDF",
 #' @note saveAsTable since 1.4.0
 setMethod("saveAsTable",
           signature(df = "SparkDataFrame", tableName = "character"),
-          function(df, tableName, source = NULL, mode="error", ...){
+          function(df, tableName, source = NULL, mode="error", ...) {
             if (is.null(source)) {
               source <- getDefaultSqlSource()
             }
@@ -2745,11 +2744,11 @@ setMethod("summary",
 #' @param how "any" or "all".
 #'            if "any", drop a row if it contains any nulls.
 #'            if "all", drop a row only if all its values are null.
-#'            if minNonNulls is specified, how is ignored.
+#'            if \code{minNonNulls} is specified, how is ignored.
 #' @param minNonNulls if specified, drop rows that have less than
-#'                    minNonNulls non-null values.
+#'                    \code{minNonNulls} non-null values.
 #'                    This overwrites the how parameter.
-#' @param cols optional list of column names to consider. In `fillna`,
+#' @param cols optional list of column names to consider. In \code{fillna},
 #'             columns specified in cols that do not have matching data
 #'             type are ignored. For example, if value is a character, and
 #'             subset contains a non-character column, then the non-character
@@ -2872,8 +2871,8 @@ setMethod("fillna",
 #' in your system to accommodate the contents.
 #'
 #' @param x a SparkDataFrame.
-#' @param row.names NULL or a character vector giving the row names for the 
data frame.
-#' @param optional If `TRUE`, converting column names is optional.
+#' @param row.names \code{NULL} or a character vector giving the row names for 
the data frame.
+#' @param optional If \code{TRUE}, converting column names is optional.
 #' @param ... additional arguments to pass to base::as.data.frame.
 #' @return A data.frame.
 #' @family SparkDataFrame functions
@@ -3051,7 +3050,7 @@ setMethod("str",
 #' @note drop since 2.0.0
 setMethod("drop",
           signature(x = "SparkDataFrame"),
-          function(x, col, ...) {
+          function(x, col) {
             stopifnot(class(col) == "character" || class(col) == "Column")
 
             if (class(col) == "Column") {
@@ -3211,8 +3210,8 @@ setMethod("histogram",
 #'         and to not change the existing data.
 #' }
 #'
-#' @param x s SparkDataFrame.
-#' @param url JDBC database url of the form `jdbc:subprotocol:subname`.
+#' @param x a SparkDataFrame.
+#' @param url JDBC database url of the form \code{jdbc:subprotocol:subname}.
 #' @param tableName yhe name of the table in the external database.
 #' @param mode one of 'append', 'overwrite', 'error', 'ignore' save mode (it 
is 'error' by default).
 #' @param ... additional JDBC database connection properties.
@@ -3230,7 +3229,7 @@ setMethod("histogram",
 #' @note write.jdbc since 2.0.0
 setMethod("write.jdbc",
           signature(x = "SparkDataFrame", url = "character", tableName = 
"character"),
-          function(x, url, tableName, mode = "error", ...){
+          function(x, url, tableName, mode = "error", ...) {
             jmode <- convertToJSaveMode(mode)
             jprops <- varargsToJProperties(...)
             write <- callJMethod(x@sdf, "write")

http://git-wip-us.apache.org/repos/asf/spark/blob/b65b041a/R/pkg/R/RDD.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
index 6b254bb..6cd0704 100644
--- a/R/pkg/R/RDD.R
+++ b/R/pkg/R/RDD.R
@@ -887,17 +887,17 @@ setMethod("sampleRDD",
 
               # Discards some random values to ensure each partition has a
               # different random seed.
-              runif(partIndex)
+              stats::runif(partIndex)
 
               for (elem in part) {
                 if (withReplacement) {
-                  count <- rpois(1, fraction)
+                  count <- stats::rpois(1, fraction)
                   if (count > 0) {
                     res[ (len + 1) : (len + count) ] <- rep(list(elem), count)
                     len <- len + count
                   }
                 } else {
-                  if (runif(1) < fraction) {
+                  if (stats::runif(1) < fraction) {
                     len <- len + 1
                     res[[len]] <- elem
                   }
@@ -965,7 +965,7 @@ setMethod("takeSample", signature(x = "RDD", 
withReplacement = "logical",
 
             set.seed(seed)
             samples <- collectRDD(sampleRDD(x, withReplacement, fraction,
-                                         as.integer(ceiling(runif(1,
+                                         as.integer(ceiling(stats::runif(1,
                                                                   -MAXINT,
                                                                   MAXINT)))))
             # If the first sample didn't turn out large enough, keep trying to
@@ -973,7 +973,7 @@ setMethod("takeSample", signature(x = "RDD", 
withReplacement = "logical",
             # multiplier for thei initial size
             while (length(samples) < total)
               samples <- collectRDD(sampleRDD(x, withReplacement, fraction,
-                                           as.integer(ceiling(runif(1,
+                                           as.integer(ceiling(stats::runif(1,
                                                                     -MAXINT,
                                                                     MAXINT)))))
 

http://git-wip-us.apache.org/repos/asf/spark/blob/b65b041a/R/pkg/R/SQLContext.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index a9cd2d8..572e71e 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -115,7 +115,7 @@ infer_type <- function(x) {
 #' Get Runtime Config from the current active SparkSession
 #'
 #' Get Runtime Config from the current active SparkSession.
-#' To change SparkSession Runtime Config, please see `sparkR.session()`.
+#' To change SparkSession Runtime Config, please see \code{sparkR.session()}.
 #'
 #' @param key (optional) The key of the config to get, if omitted, all config 
is returned
 #' @param defaultValue (optional) The default value of the config to return if 
they config is not
@@ -720,11 +720,11 @@ dropTempView <- function(viewName) {
 #'
 #' Returns the dataset in a data source as a SparkDataFrame
 #'
-#' The data source is specified by the `source` and a set of options(...).
-#' If `source` is not specified, the default data source configured by
+#' The data source is specified by the \code{source} and a set of options(...).
+#' If \code{source} is not specified, the default data source configured by
 #' "spark.sql.sources.default" will be used. \cr
-#' Similar to R read.csv, when `source` is "csv", by default, a value of "NA" 
will be interpreted
-#' as NA.
+#' Similar to R read.csv, when \code{source} is "csv", by default, a value of 
"NA" will be
+#' interpreted as NA.
 #'
 #' @param path The path of files to load
 #' @param source The name of external data source
@@ -791,8 +791,8 @@ loadDF <- function(x, ...) {
 #' Creates an external table based on the dataset in a data source,
 #' Returns a SparkDataFrame associated with the external table.
 #'
-#' The data source is specified by the `source` and a set of options(...).
-#' If `source` is not specified, the default data source configured by
+#' The data source is specified by the \code{source} and a set of options(...).
+#' If \code{source} is not specified, the default data source configured by
 #' "spark.sql.sources.default" will be used.
 #'
 #' @param tableName a name of the table.
@@ -830,22 +830,22 @@ createExternalTable <- function(x, ...) {
 #' Additional JDBC database connection properties can be set (...)
 #'
 #' Only one of partitionColumn or predicates should be set. Partitions of the 
table will be
-#' retrieved in parallel based on the `numPartitions` or by the predicates.
+#' retrieved in parallel based on the \code{numPartitions} or by the 
predicates.
 #'
 #' Don't create too many partitions in parallel on a large cluster; otherwise 
Spark might crash
 #' your external database systems.
 #'
-#' @param url JDBC database url of the form `jdbc:subprotocol:subname`
+#' @param url JDBC database url of the form \code{jdbc:subprotocol:subname}
 #' @param tableName the name of the table in the external database
 #' @param partitionColumn the name of a column of integral type that will be 
used for partitioning
-#' @param lowerBound the minimum value of `partitionColumn` used to decide 
partition stride
-#' @param upperBound the maximum value of `partitionColumn` used to decide 
partition stride
-#' @param numPartitions the number of partitions, This, along with 
`lowerBound` (inclusive),
-#'                      `upperBound` (exclusive), form partition strides for 
generated WHERE
-#'                      clause expressions used to split the column 
`partitionColumn` evenly.
+#' @param lowerBound the minimum value of \code{partitionColumn} used to 
decide partition stride
+#' @param upperBound the maximum value of \code{partitionColumn} used to 
decide partition stride
+#' @param numPartitions the number of partitions, This, along with 
\code{lowerBound} (inclusive),
+#'                      \code{upperBound} (exclusive), form partition strides 
for generated WHERE
+#'                      clause expressions used to split the column 
\code{partitionColumn} evenly.
 #'                      This defaults to SparkContext.defaultParallelism when 
unset.
 #' @param predicates a list of conditions in the where clause; each one 
defines one partition
-#' @param ... additional JDBC database connection named propertie(s).
+#' @param ... additional JDBC database connection named properties.
 #' @return SparkDataFrame
 #' @rdname read.jdbc
 #' @name read.jdbc

http://git-wip-us.apache.org/repos/asf/spark/blob/b65b041a/R/pkg/R/WindowSpec.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/WindowSpec.R b/R/pkg/R/WindowSpec.R
index b55356b..ddd2ef2 100644
--- a/R/pkg/R/WindowSpec.R
+++ b/R/pkg/R/WindowSpec.R
@@ -44,6 +44,7 @@ windowSpec <- function(sws) {
 }
 
 #' @rdname show
+#' @export
 #' @note show(WindowSpec) since 2.0.0
 setMethod("show", "WindowSpec",
           function(object) {
@@ -125,11 +126,11 @@ setMethod("orderBy",
 
 #' rowsBetween
 #'
-#' Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive).
+#' Defines the frame boundaries, from \code{start} (inclusive) to \code{end} 
(inclusive).
 #'
-#' Both `start` and `end` are relative positions from the current row. For 
example, "0" means
-#' "current row", while "-1" means the row before the current row, and "5" 
means the fifth row
-#' after the current row.
+#' Both \code{start} and \code{end} are relative positions from the current 
row. For example,
+#' "0" means "current row", while "-1" means the row before the current row, 
and "5" means the
+#' fifth row after the current row.
 #'
 #' @param x a WindowSpec
 #' @param start boundary start, inclusive.
@@ -157,12 +158,12 @@ setMethod("rowsBetween",
 
 #' rangeBetween
 #'
-#' Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive).
+#' Defines the frame boundaries, from \code{start} (inclusive) to \code{end} 
(inclusive).
+#'
+#' Both \code{start} and \code{end} are relative from the current row. For 
example, "0" means
+#' "current row", while "-1" means one off before the current row, and "5" 
means the five off
+#' after the current row.
 #'
-#' Both `start` and `end` are relative from the current row. For example, "0" 
means "current row",
-#' while "-1" means one off before the current row, and "5" means the five off 
after the
-#' current row.
-
 #' @param x a WindowSpec
 #' @param start boundary start, inclusive.
 #'              The frame is unbounded if this is the minimum long value.
@@ -195,8 +196,8 @@ setMethod("rangeBetween",
 #' Define a windowing column.
 #'
 #' @param x a Column, usually one returned by window function(s).
-#' @param window a WindowSpec object. Can be created by `windowPartitionBy` or
-#'        `windowOrderBy` and configured by other WindowSpec methods.
+#' @param window a WindowSpec object. Can be created by 
\code{windowPartitionBy} or
+#'        \code{windowOrderBy} and configured by other WindowSpec methods.
 #' @rdname over
 #' @name over
 #' @aliases over,Column,WindowSpec-method

http://git-wip-us.apache.org/repos/asf/spark/blob/b65b041a/R/pkg/R/column.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index af486e1..539d91b 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -284,7 +284,7 @@ setMethod("%in%",
 #' otherwise
 #'
 #' If values in the specified column are null, returns the value.
-#' Can be used in conjunction with `when` to specify a default value for 
expressions.
+#' Can be used in conjunction with \code{when} to specify a default value for 
expressions.
 #'
 #' @param x a Column.
 #' @param value value to replace when the corresponding entry in \code{x} is 
NA.

http://git-wip-us.apache.org/repos/asf/spark/blob/b65b041a/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index b3c10de..f042add 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -1250,7 +1250,7 @@ setMethod("rint",
 
 #' round
 #'
-#' Returns the value of the column `e` rounded to 0 decimal places using 
HALF_UP rounding mode.
+#' Returns the value of the column \code{e} rounded to 0 decimal places using 
HALF_UP rounding mode.
 #'
 #' @param x Column to compute on.
 #'
@@ -1974,7 +1974,7 @@ setMethod("atan2", signature(y = "Column"),
 
 #' datediff
 #'
-#' Returns the number of days from `start` to `end`.
+#' Returns the number of days from \code{start} to \code{end}.
 #'
 #' @param x start Column to use.
 #' @param y end Column to use.
@@ -2043,7 +2043,7 @@ setMethod("levenshtein", signature(y = "Column"),
 
 #' months_between
 #'
-#' Returns number of months between dates `date1` and `date2`.
+#' Returns number of months between dates \code{date1} and \code{date2}.
 #'
 #' @param x start Column to use.
 #' @param y end Column to use.
@@ -2430,7 +2430,7 @@ setMethod("add_months", signature(y = "Column", x = 
"numeric"),
 
 #' date_add
 #'
-#' Returns the date that is `days` days after `start`
+#' Returns the date that is \code{x} days after
 #'
 #' @param y Column to compute on
 #' @param x Number of days to add
@@ -2450,7 +2450,7 @@ setMethod("date_add", signature(y = "Column", x = 
"numeric"),
 
 #' date_sub
 #'
-#' Returns the date that is `days` days before `start`
+#' Returns the date that is \code{x} days before
 #'
 #' @param y Column to compute on
 #' @param x Number of days to substract
@@ -3113,7 +3113,7 @@ setMethod("ifelse",
 #'   N = total number of rows in the partition
 #'   cume_dist(x) = number of values before (and including) x / N
 #'
-#' This is equivalent to the CUME_DIST function in SQL.
+#' This is equivalent to the \code{CUME_DIST} function in SQL.
 #'
 #' @rdname cume_dist
 #' @name cume_dist
@@ -3141,7 +3141,7 @@ setMethod("cume_dist",
 #' and had three people tie for second place, you would say that all three 
were in second
 #' place and that the next person came in third.
 #'
-#' This is equivalent to the DENSE_RANK function in SQL.
+#' This is equivalent to the \code{DENSE_RANK} function in SQL.
 #'
 #' @rdname dense_rank
 #' @name dense_rank
@@ -3159,11 +3159,11 @@ setMethod("dense_rank",
 
 #' lag
 #'
-#' Window function: returns the value that is `offset` rows before the current 
row, and
-#' `defaultValue` if there is less than `offset` rows before the current row. 
For example,
-#' an `offset` of one will return the previous row at any given point in the 
window partition.
+#' Window function: returns the value that is \code{offset} rows before the 
current row, and
+#' \code{defaultValue} if there is less than \code{offset} rows before the 
current row. For example,
+#' an \code{offset} of one will return the previous row at any given point in 
the window partition.
 #'
-#' This is equivalent to the LAG function in SQL.
+#' This is equivalent to the \code{LAG} function in SQL.
 #'
 #' @param x the column as a character string or a Column to compute on.
 #' @param offset the number of rows back from the current row from which to 
obtain a value.
@@ -3193,11 +3193,11 @@ setMethod("lag",
 
 #' lead
 #'
-#' Window function: returns the value that is `offset` rows after the current 
row, and
-#' `null` if there is less than `offset` rows after the current row. For 
example,
-#' an `offset` of one will return the next row at any given point in the 
window partition.
+#' Window function: returns the value that is \code{offset} rows after the 
current row, and
+#' NULL if there is less than \code{offset} rows after the current row. For 
example,
+#' an \code{offset} of one will return the next row at any given point in the 
window partition.
 #'
-#' This is equivalent to the LEAD function in SQL.
+#' This is equivalent to the \code{LEAD} function in SQL.
 #'
 #' @param x Column to compute on
 #' @param offset Number of rows to offset
@@ -3226,11 +3226,11 @@ setMethod("lead",
 
 #' ntile
 #'
-#' Window function: returns the ntile group id (from 1 to `n` inclusive) in an 
ordered window
-#' partition. For example, if `n` is 4, the first quarter of the rows will get 
value 1, the second
+#' Window function: returns the ntile group id (from 1 to n inclusive) in an 
ordered window
+#' partition. For example, if n is 4, the first quarter of the rows will get 
value 1, the second
 #' quarter will get 2, the third quarter will get 3, and the last quarter will 
get 4.
 #'
-#' This is equivalent to the NTILE function in SQL.
+#' This is equivalent to the \code{NTILE} function in SQL.
 #'
 #' @param x Number of ntile groups
 #'

http://git-wip-us.apache.org/repos/asf/spark/blob/b65b041a/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 70db7cb..b54a92a 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -438,17 +438,17 @@ setGeneric("columns", function(x) 
{standardGeneric("columns") })
 setGeneric("count", function(x) { standardGeneric("count") })
 
 #' @rdname cov
-#' @param x a Column object or a SparkDataFrame.
-#' @param ... additional argument(s). If `x` is a Column object, a Column 
object
-#'        should be provided. If `x` is a SparkDataFrame, two column names 
should
+#' @param x a Column or a SparkDataFrame.
+#' @param ... additional argument(s). If \code{x} is a Column, a Column
+#'        should be provided. If \code{x} is a SparkDataFrame, two column 
names should
 #'        be provided.
 #' @export
 setGeneric("cov", function(x, ...) {standardGeneric("cov") })
 
 #' @rdname corr
-#' @param x a Column object or a SparkDataFrame.
-#' @param ... additional argument(s). If `x` is a Column object, a Column 
object
-#'        should be provided. If `x` is a SparkDataFrame, two column names 
should
+#' @param x a Column or a SparkDataFrame.
+#' @param ... additional argument(s). If \code{x} is a Column, a Column
+#'        should be provided. If \code{x} is a SparkDataFrame, two column 
names should
 #'        be provided.
 #' @export
 setGeneric("corr", function(x, ...) {standardGeneric("corr") })
@@ -851,7 +851,7 @@ setGeneric("array_contains", function(x, value) { 
standardGeneric("array_contain
 setGeneric("ascii", function(x) { standardGeneric("ascii") })
 
 #' @param x Column to compute on or a GroupedData object.
-#' @param ... additional argument(s) when `x` is a GroupedData object.
+#' @param ... additional argument(s) when \code{x} is a GroupedData object.
 #' @rdname avg
 #' @export
 setGeneric("avg", function(x, ...) { standardGeneric("avg") })

http://git-wip-us.apache.org/repos/asf/spark/blob/b65b041a/R/pkg/R/group.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R
index 3c85ada..e3479ef 100644
--- a/R/pkg/R/group.R
+++ b/R/pkg/R/group.R
@@ -48,6 +48,7 @@ groupedData <- function(sgd) {
 
 #' @rdname show
 #' @aliases show,GroupedData-method
+#' @export
 #' @note show(GroupedData) since 1.4.0
 setMethod("show", "GroupedData",
           function(object) {

http://git-wip-us.apache.org/repos/asf/spark/blob/b65b041a/R/pkg/R/mllib.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 008d92f..25dcb3a 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -429,7 +429,7 @@ setMethod("predict", signature(object = "KMeansModel"),
 #' @rdname spark.naiveBayes
 #' @aliases spark.naiveBayes,SparkDataFrame,formula-method
 #' @name spark.naiveBayes
-#' @seealso e1071: \url{https://cran.r-project.org/web/packages/e1071/}
+#' @seealso e1071: \url{https://cran.r-project.org/package=e1071}
 #' @export
 #' @examples
 #' \dontrun{
@@ -575,7 +575,7 @@ read.ml <- function(path) {
 #'                Note that operator '.' is not supported currently.
 #' @return \code{spark.survreg} returns a fitted AFT survival regression model.
 #' @rdname spark.survreg
-#' @seealso survival: \url{https://cran.r-project.org/web/packages/survival/}
+#' @seealso survival: \url{https://cran.r-project.org/package=survival}
 #' @export
 #' @examples
 #' \dontrun{
@@ -604,7 +604,6 @@ setMethod("spark.survreg", signature(data = 
"SparkDataFrame", formula = "formula
             return(new("AFTSurvivalRegressionModel", jobj = jobj))
           })
 
-
 # Returns a summary of the AFT survival regression model produced by 
spark.survreg,
 # similarly to R's summary().
 

http://git-wip-us.apache.org/repos/asf/spark/blob/b65b041a/R/pkg/R/pairRDD.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/pairRDD.R b/R/pkg/R/pairRDD.R
index f0605db..4dee324 100644
--- a/R/pkg/R/pairRDD.R
+++ b/R/pkg/R/pairRDD.R
@@ -917,19 +917,19 @@ setMethod("sampleByKey",
               len <- 0
 
               # mixing because the initial seeds are close to each other
-              runif(10)
+              stats::runif(10)
 
               for (elem in part) {
                 if (elem[[1]] %in% names(fractions)) {
                   frac <- as.numeric(fractions[which(elem[[1]] == 
names(fractions))])
                   if (withReplacement) {
-                    count <- rpois(1, frac)
+                    count <- stats::rpois(1, frac)
                     if (count > 0) {
                       res[ (len + 1) : (len + count) ] <- rep(list(elem), 
count)
                       len <- len + count
                     }
                   } else {
-                    if (runif(1) < frac) {
+                    if (stats::runif(1) < frac) {
                       len <- len + 1
                       res[[len]] <- elem
                     }

http://git-wip-us.apache.org/repos/asf/spark/blob/b65b041a/R/pkg/R/stats.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/stats.R b/R/pkg/R/stats.R
index 8ea24d8..dcd7198 100644
--- a/R/pkg/R/stats.R
+++ b/R/pkg/R/stats.R
@@ -29,9 +29,9 @@ setOldClass("jobj")
 #' @param col1 name of the first column. Distinct items will make the first 
item of each row.
 #' @param col2 name of the second column. Distinct items will make the column 
names of the output.
 #' @return a local R data.frame representing the contingency table. The first 
column of each row
-#'         will be the distinct values of `col1` and the column names will be 
the distinct values
-#'         of `col2`. The name of the first column will be `$col1_$col2`. 
Pairs that have no
-#'         occurrences will have zero as their counts.
+#'         will be the distinct values of \code{col1} and the column names 
will be the distinct values
+#'         of \code{col2}. The name of the first column will be 
"\code{col1}_\code{col2}". Pairs
+#'         that have no occurrences will have zero as their counts.
 #'
 #' @rdname crosstab
 #' @name crosstab
@@ -116,7 +116,7 @@ setMethod("corr",
 #'
 #' @param x A SparkDataFrame.
 #' @param cols A vector column names to search frequent items in.
-#' @param support (Optional) The minimum frequency for an item to be 
considered `frequent`.
+#' @param support (Optional) The minimum frequency for an item to be 
considered \code{frequent}.
 #'                Should be greater than 1e-4. Default support = 0.01.
 #' @return a local R data.frame with the frequent items in each column
 #'
@@ -142,9 +142,9 @@ setMethod("freqItems", signature(x = "SparkDataFrame", cols 
= "character"),
 #'
 #' Calculates the approximate quantiles of a numerical column of a 
SparkDataFrame.
 #' The result of this algorithm has the following deterministic bound:
-#' If the SparkDataFrame has N elements and if we request the quantile at 
probability `p` up to
-#' error `err`, then the algorithm will return a sample `x` from the 
SparkDataFrame so that the
-#' *exact* rank of `x` is close to (p * N). More precisely,
+#' If the SparkDataFrame has N elements and if we request the quantile at 
probability p up to
+#' error err, then the algorithm will return a sample x from the 
SparkDataFrame so that the
+#' *exact* rank of x is close to (p * N). More precisely,
 #'   floor((p - err) * N) <= rank(x) <= ceil((p + err) * N).
 #' This method implements a variation of the Greenwald-Khanna algorithm (with 
some speed
 #' optimizations). The algorithm was first present in 
[[http://dx.doi.org/10.1145/375663.375670


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to